Common function definitions:

plot_correlation <- function(dataset) {
  #' Calculate the correlation among columns in the dataset
  #' and plot a heat diagram with the results
  #' @param dataset Data.frame to analyse
  #' @return correlation data
  corr_data <- cor(dataset)
  
  corrplot(corr_data, type="full", 
           order = "original",
           tl.cex = .6, 
           addCoefasPercent = TRUE,
           col=brewer.pal(n=8, name="RdYlBu"))
  return(corr_data)
}

# General EDA
describe_df <- function(name, df) {
  paste("EDA for ", name, ":")
  str(df)
  summary(df)
  paste("Number of NA values: ", sum(is.na(df)))
}

#### Preprocessing functions
remove_highly_correlated_features <- function(df) {
  corr_data <- cor(df)
  high_corr_cols <- findCorrelation(corr_data, cutoff = 0.9, verbose = FALSE, names = FALSE, exact = ncol(corr_data))
  df[high_corr_cols] <- NULL
  return(df)
}

remove_nzv <- function(df) {
  # nearZeroVar() with saveMetrics = FALSE returns an vector 
  nzv <- nearZeroVar(df, saveMetrics = FALSE) 
  str(nzv)

  # create a new data set and remove near zero variance features
  df_new <- df[,-nzv]
  str(df_new)
  return(df_new)
}


#### Execute in parallel
run_in_parallel <- function(FUN, ...) {
  # Find how many cores are on your machine
  num_cores <- detectCores() # Result = Typically 4 to 6
  
  # Create Cluster with desired number of cores. Don't use them all! Your computer is running other processes. 
  cl <- makeCluster(num_cores - 2)
  
  # Register Cluster
  registerDoParallel(cl)
  
  result <- FUN(...)
  
  # Stop Cluster. After performing your tasks, stop your cluster. 
  stopCluster(cl)
  return(result)
}

svm_train <- function(dataF, testing_data) {
  # SVM (from the e1071 package) 
  library(e1071)
  set.seed(641386945)
  system.time(res.model <- run_in_parallel(svm, iphonesentiment ~., data = dataF))
  res.predictions <- predict(res.model, testing_data) 
  res.post_resample <- postResample(res.predictions, testing_data$iphonesentiment)
  return(list("model" = res.model, "post_resample" = res.post_resample))
}

knn_train <- function(dataF, testing_data) {
  # K-nearest Neighbors (from the kknn package)
  library(kknn)
  set.seed(641386945)
  system.time(res.model <- run_in_parallel(train.kknn, iphonesentiment ~., data = dataF))
  res.predictions <- predict(res.model, testing_data) 
  res.post_resample <- postResample(res.predictions, testing_data$iphonesentiment)
  return(list("model" = res.model, "post_resample" = res.post_resample))
}

caret_train <- function(dataF, testing_data, model_name, fitCtrl) {
  set.seed(641386945)
  system.time(res.model <- run_in_parallel(train, iphonesentiment~., data = dataF, method = model_name, trControl = fitCtrl ))
  res.predictions <- predict(res.model, testing_data) 
  res.post_resample <- postResample(res.predictions, testing_data$iphonesentiment)
  return(list("model" = res.model, "post_resample" = res.post_resample))
}

plot_confusion_matrix <- function(conf_matrix, model_name) {
  table <- data.frame(conf_matrix$table)
  
  plotTable <- table %>%
    mutate(goodbad = ifelse(table$Prediction == table$Reference, "good", "bad")) %>%
    group_by(Reference) %>%
    mutate(prop = Freq/sum(Freq))
  
  # fill alpha relative to sensitivity/specificity by proportional outcomes within reference groups (see dplyr code above as well as original confusion matrix for comparison)
  ggplot(data = plotTable, mapping = aes(x = Reference, y = Prediction, fill = goodbad, alpha = prop)) +
    geom_tile() +
    geom_text(aes(label = Freq), vjust = .5, fontface  = "bold", alpha = 1) +
    scale_fill_manual(values = c(good = "green", bad = "red")) +
    theme_bw() +
    xlim(rev(levels(table$Reference))) +
    ggtitle(paste(model_name,"Confusion Matrix"))
}

iPhone analysis

Load training datasets for iPhone labeled sentiment.

Parsed with column specification:
cols(
  .default = col_double()
)
See spec(...) for full column specifications.

Explore structure and descriptive statistics from the training datasets

Classes ‘spec_tbl_df’, ‘tbl_df’, ‘tbl’ and 'data.frame':    12973 obs. of  59 variables:
 $ iphone         : num  1 1 1 1 1 41 1 1 1 1 ...
 $ samsunggalaxy  : num  0 0 0 0 0 0 0 0 0 0 ...
 $ sonyxperia     : num  0 0 0 0 0 0 0 0 0 0 ...
 $ nokialumina    : num  0 0 0 0 0 0 0 0 0 0 ...
 $ htcphone       : num  0 0 0 0 0 0 0 0 0 0 ...
 $ ios            : num  0 0 0 0 0 6 0 0 0 0 ...
 $ googleandroid  : num  0 0 0 0 0 0 0 0 0 0 ...
 $ iphonecampos   : num  0 0 0 0 0 1 1 0 0 0 ...
 $ samsungcampos  : num  0 0 0 0 0 0 0 0 0 0 ...
 $ sonycampos     : num  0 0 0 0 0 0 0 0 0 0 ...
 $ nokiacampos    : num  0 0 0 0 0 0 0 0 0 0 ...
 $ htccampos      : num  0 0 0 0 0 0 0 0 0 0 ...
 $ iphonecamneg   : num  0 0 0 0 0 3 1 0 0 0 ...
 $ samsungcamneg  : num  0 0 0 0 0 0 0 0 0 0 ...
 $ sonycamneg     : num  0 0 0 0 0 0 0 0 0 0 ...
 $ nokiacamneg    : num  0 0 0 0 0 0 0 0 0 0 ...
 $ htccamneg      : num  0 0 0 0 0 0 0 0 0 0 ...
 $ iphonecamunc   : num  0 0 0 0 0 7 1 0 0 0 ...
 $ samsungcamunc  : num  0 0 0 0 0 0 0 0 0 0 ...
 $ sonycamunc     : num  0 0 0 0 0 0 0 0 0 0 ...
 $ nokiacamunc    : num  0 0 0 0 0 0 0 0 0 0 ...
 $ htccamunc      : num  0 0 0 0 0 0 0 0 0 0 ...
 $ iphonedispos   : num  0 0 0 0 0 1 13 0 0 0 ...
 $ samsungdispos  : num  0 0 0 0 0 0 0 0 0 0 ...
 $ sonydispos     : num  0 0 0 0 0 0 0 0 0 0 ...
 $ nokiadispos    : num  0 0 0 0 0 0 0 0 0 0 ...
 $ htcdispos      : num  0 0 0 0 0 0 0 0 0 0 ...
 $ iphonedisneg   : num  0 0 0 0 0 3 10 0 0 0 ...
 $ samsungdisneg  : num  0 0 0 0 0 0 0 0 0 0 ...
 $ sonydisneg     : num  0 0 0 0 0 0 0 0 0 0 ...
 $ nokiadisneg    : num  0 0 0 0 0 0 0 0 0 0 ...
 $ htcdisneg      : num  0 0 0 0 0 0 0 0 0 0 ...
 $ iphonedisunc   : num  0 0 0 0 0 4 9 0 0 0 ...
 $ samsungdisunc  : num  0 0 0 0 0 0 0 0 0 0 ...
 $ sonydisunc     : num  0 0 0 0 0 0 0 0 0 0 ...
 $ nokiadisunc    : num  0 0 0 0 0 0 0 0 0 0 ...
 $ htcdisunc      : num  0 0 0 0 0 0 0 0 0 0 ...
 $ iphoneperpos   : num  0 1 0 1 1 0 5 3 0 0 ...
 $ samsungperpos  : num  0 0 0 0 0 0 0 0 0 0 ...
 $ sonyperpos     : num  0 0 0 0 0 0 0 0 0 0 ...
 $ nokiaperpos    : num  0 0 0 0 0 0 0 0 0 0 ...
 $ htcperpos      : num  0 0 0 0 0 0 0 0 0 0 ...
 $ iphoneperneg   : num  0 0 0 0 0 0 4 1 0 0 ...
 $ samsungperneg  : num  0 0 0 0 0 0 0 0 0 0 ...
 $ sonyperneg     : num  0 0 0 0 0 0 0 0 0 0 ...
 $ nokiaperneg    : num  0 0 0 0 0 0 0 0 0 0 ...
 $ htcperneg      : num  0 0 0 0 0 0 0 0 0 0 ...
 $ iphoneperunc   : num  0 0 0 1 0 0 5 0 0 0 ...
 $ samsungperunc  : num  0 0 0 0 0 0 0 0 0 0 ...
 $ sonyperunc     : num  0 0 0 0 0 0 0 0 0 0 ...
 $ nokiaperunc    : num  0 0 0 0 0 0 0 0 0 0 ...
 $ htcperunc      : num  0 0 0 0 0 0 0 0 0 0 ...
 $ iosperpos      : num  0 0 0 0 0 0 0 0 0 0 ...
 $ googleperpos   : num  0 0 0 0 0 0 0 0 0 0 ...
 $ iosperneg      : num  0 0 0 0 0 0 0 0 0 0 ...
 $ googleperneg   : num  0 0 0 0 0 0 0 0 0 0 ...
 $ iosperunc      : num  0 0 0 0 0 0 0 0 0 0 ...
 $ googleperunc   : num  0 0 0 0 0 0 0 0 0 0 ...
 $ iphonesentiment: num  0 0 0 0 0 4 4 0 0 0 ...
 - attr(*, "spec")=
  .. cols(
  ..   iphone = col_double(),
  ..   samsunggalaxy = col_double(),
  ..   sonyxperia = col_double(),
  ..   nokialumina = col_double(),
  ..   htcphone = col_double(),
  ..   ios = col_double(),
  ..   googleandroid = col_double(),
  ..   iphonecampos = col_double(),
  ..   samsungcampos = col_double(),
  ..   sonycampos = col_double(),
  ..   nokiacampos = col_double(),
  ..   htccampos = col_double(),
  ..   iphonecamneg = col_double(),
  ..   samsungcamneg = col_double(),
  ..   sonycamneg = col_double(),
  ..   nokiacamneg = col_double(),
  ..   htccamneg = col_double(),
  ..   iphonecamunc = col_double(),
  ..   samsungcamunc = col_double(),
  ..   sonycamunc = col_double(),
  ..   nokiacamunc = col_double(),
  ..   htccamunc = col_double(),
  ..   iphonedispos = col_double(),
  ..   samsungdispos = col_double(),
  ..   sonydispos = col_double(),
  ..   nokiadispos = col_double(),
  ..   htcdispos = col_double(),
  ..   iphonedisneg = col_double(),
  ..   samsungdisneg = col_double(),
  ..   sonydisneg = col_double(),
  ..   nokiadisneg = col_double(),
  ..   htcdisneg = col_double(),
  ..   iphonedisunc = col_double(),
  ..   samsungdisunc = col_double(),
  ..   sonydisunc = col_double(),
  ..   nokiadisunc = col_double(),
  ..   htcdisunc = col_double(),
  ..   iphoneperpos = col_double(),
  ..   samsungperpos = col_double(),
  ..   sonyperpos = col_double(),
  ..   nokiaperpos = col_double(),
  ..   htcperpos = col_double(),
  ..   iphoneperneg = col_double(),
  ..   samsungperneg = col_double(),
  ..   sonyperneg = col_double(),
  ..   nokiaperneg = col_double(),
  ..   htcperneg = col_double(),
  ..   iphoneperunc = col_double(),
  ..   samsungperunc = col_double(),
  ..   sonyperunc = col_double(),
  ..   nokiaperunc = col_double(),
  ..   htcperunc = col_double(),
  ..   iosperpos = col_double(),
  ..   googleperpos = col_double(),
  ..   iosperneg = col_double(),
  ..   googleperneg = col_double(),
  ..   iosperunc = col_double(),
  ..   googleperunc = col_double(),
  ..   iphonesentiment = col_double()
  .. )
[1] "Number of NA values:  0"

Labeled sentiment distribution.

plot_ly(iphoneDF, x= ~iphoneDF$iphonesentiment, type='histogram')

Feature selection methods

Features Correlation

Explore correlation between all variables:

[1] "Number of original features:  59"
[1] "Number of features after cleanup:  46"
                      iphone samsunggalaxy   sonyxperia   nokialumina googleandroid iphonecampos samsungcampos   sonycampos   nokiacampos
iphone           1.000000000  0.0197858228 -0.011617908 -0.0134231660  0.1075300211  0.078157326   0.057395445 -0.004593694 -0.0084394692
samsunggalaxy    0.019785823  1.0000000000  0.366670822 -0.0060880044  0.2361624448  0.030556160   0.252120595  0.145969111 -0.0004004217
sonyxperia      -0.011617908  0.3666708219  1.000000000 -0.0063498609 -0.0182884699  0.005067537   0.050139994  0.396750611 -0.0042317350
nokialumina     -0.013423166 -0.0060880044 -0.006349861  1.0000000000 -0.0011148600  0.029824073   0.009298654 -0.002754318  0.7004150055
googleandroid    0.107530021  0.2361624448 -0.018288470 -0.0011148600  1.0000000000  0.104419788   0.315487421 -0.000205677  0.0032844953
iphonecampos     0.078157326  0.0305561602  0.005067537  0.0298240731  0.1044197879  1.000000000   0.062438375  0.045008555  0.0308169399
samsungcampos    0.057395445  0.2521205947  0.050139994  0.0092986543  0.3154874208  0.062438375   1.000000000  0.145429114  0.0148603600
sonycampos      -0.004593694  0.1459691107  0.396750611 -0.0027543180 -0.0002056770  0.045008555   0.145429114  1.000000000 -0.0018355589
nokiacampos     -0.008439469 -0.0004004217 -0.004231735  0.7004150055  0.0032844953  0.030816940   0.014860360 -0.001835559  1.0000000000
htccampos        0.022717373  0.0652742531  0.016506652  0.0212952463  0.1480948517  0.623911948   0.090099060  0.058852134  0.0177614870
iphonecamneg     0.490523588  0.1260628010 -0.006715050  0.0632452640  0.3918021616  0.541339970   0.206019550  0.013254078  0.0533714992
samsungcamneg    0.142552547  0.3429191699 -0.004308481  0.0095460787  0.7114026076  0.117450549   0.608840260  0.032897238  0.0160468719
sonycamneg      -0.001830062  0.0318207535  0.345295791 -0.0012288604  0.0135394518  0.019994295   0.053984838  0.408990574 -0.0008189489
htccamneg        0.104612765  0.2227765630 -0.012284395  0.0372556408  0.5627028612  0.206584693   0.295428414  0.013568090  0.0305012877
iphonecamunc     0.750403174 -0.0101550995 -0.007638359  0.0162370821  0.0429551472  0.473266316   0.028875432  0.016442392  0.0186364023
samsungcamunc    0.073451360  0.3161343514  0.058776666  0.0409222547  0.3914328531  0.076943199   0.814799293  0.164043120  0.0580378785
sonycamunc      -0.003064444  0.1041234759  0.376632870 -0.0019143276 -0.0065777411  0.029397327   0.098836218  0.528452314 -0.0012757645
htccamunc        0.026137801  0.0729644969  0.014249220  0.0361240923  0.1661818489  0.321523039   0.104495332  0.056574443  0.0285796837
iphonedispos     0.052624621 -0.0065259392 -0.018121019  0.0283164282  0.0669526940  0.272586547   0.039427134  0.019616805  0.0292070224
sonydispos      -0.003826569  0.0613598274  0.252589216 -0.0015281417 -0.0016689972  0.017749083   0.058122227  0.404993025 -0.0010183988
nokiadispos     -0.008202154  0.0102477757 -0.003772222  0.6502528013 -0.0041735324  0.026317276   0.038370716 -0.001636240  0.8708585691
htcdispos        0.007124916  0.0248386343  0.003299105  0.0105537205  0.0575517863  0.067428991   0.032923476  0.016457151  0.0088485247
iphonedisneg     0.175572621  0.0178241295 -0.013589598  0.0237416752  0.1218206151  0.148650674   0.065278833  0.006716763  0.0221700030
sonydisneg      -0.002777449  0.0067858231  0.163285424 -0.0006439773  0.0002154346  0.002884403   0.011236169  0.131891912 -0.0004291655
htcdisneg        0.085273338  0.1888214611 -0.002137681  0.0442224554  0.4470127937  0.110101690   0.238425394  0.037623834  0.0358414565
iphonedisunc     0.250929821 -0.0278792236 -0.017980754  0.0026813370  0.0177913687  0.188310035   0.012312670  0.007383581  0.0050123671
sonydisunc      -0.004552958  0.0605561472  0.295428494 -0.0013832449 -0.0047529101  0.019403175   0.067667721  0.388803630 -0.0009218353
nokiadisunc     -0.007588080  0.0146608209 -0.003233038  0.4913317753 -0.0035769868  0.009608304   0.046811518 -0.001402364  0.7956709983
htcdisunc        0.024321607  0.0717456027  0.010002504  0.0211143866  0.1470680904  0.156063063   0.086766462  0.055055469  0.0163417006
iphoneperpos    -0.009507666 -0.0031694293 -0.028717034  0.0333454788  0.1060606249  0.348332416   0.056272259  0.009152257  0.0340558067
samsungperpos    0.051538330  0.2428663875  0.020914013  0.0174590583  0.2703549368  0.045221267   0.793898531  0.046922592  0.0257099089
sonyperpos      -0.006326680  0.0674892485  0.266141716 -0.0019186794  0.0008356533  0.013944374   0.047395270  0.387310836 -0.0012786647
nokiaperpos     -0.010508929  0.0018461920 -0.004606023  0.7374571504 -0.0022999416  0.021177553   0.021581165 -0.001997910  0.8874498813
htcperpos        0.030621280  0.0882886778  0.004676818  0.0391126464  0.2094141916  0.287084655   0.115131849  0.021325928  0.0263959730
iphoneperneg     0.013863107  0.0459625861 -0.028774205  0.0337354136  0.2125251416  0.151918629   0.112507527  0.006280325  0.0307357306
samsungperneg    0.115130047  0.3035599053 -0.001931200  0.0173540315  0.5580899386  0.092030029   0.546669519  0.034148912  0.0262075456
sonyperneg      -0.003624744  0.0099767986  0.122406709 -0.0009478623  0.0056565738  0.007034030   0.019365699  0.182829435 -0.0006316834
htcperneg        0.075975448  0.1784097437 -0.012082758  0.0500508027  0.4334113667  0.109391575   0.231171524  0.009013357  0.0337746618
iphoneperunc    -0.016037424 -0.0173890375 -0.028220476  0.0201971628  0.0566763832  0.187259620   0.031844675  0.008176179  0.0215513806
samsungperunc    0.046821536  0.1847750567  0.008007867  0.0352741223  0.2217259668  0.040154370   0.487766617  0.053436363  0.0492523325
sonyperunc      -0.003045401  0.0374818698  0.151675102 -0.0012035022 -0.0041353037  0.019987020   0.057860036  0.378811803 -0.0008020494
htcperunc        0.011414261  0.0449278125 -0.004888049  0.0237567173  0.1096851420  0.067283071   0.061303831  0.015781446  0.0172606280
iosperpos       -0.020059363 -0.0058016799 -0.011009239  0.0307188669 -0.0167016726 -0.003991076   0.102471444 -0.003118246  0.1031232916
googleperpos     0.118008345  0.2460460932 -0.008467307  0.0065145374  0.6385807446  0.117901901   0.298281304  0.006672666  0.0115643182
googleperunc     0.067859235  0.1422517633 -0.007916030  0.0079987614  0.3719984702  0.073003914   0.159171496 -0.003433660  0.0125178481
iphonesentiment  0.014858654 -0.3591727604 -0.233169880 -0.0559617692 -0.1891420499 -0.029731217  -0.112743311 -0.090665090 -0.0333745609
                   htccampos iphonecamneg samsungcamneg    sonycamneg    htccamneg iphonecamunc samsungcamunc    sonycamunc    htccamunc iphonedispos
iphone           0.022717373  0.490523588   0.142552547 -0.0018300621  0.104612765  0.750403174    0.07345136 -0.0030644436  0.026137801  0.052624621
samsunggalaxy    0.065274253  0.126062801   0.342919170  0.0318207535  0.222776563 -0.010155099    0.31613435  0.1041234759  0.072964497 -0.006525939
sonyxperia       0.016506652 -0.006715050  -0.004308481  0.3452957914 -0.012284395 -0.007638359    0.05877667  0.3766328700  0.014249220 -0.018121019
nokialumina      0.021295246  0.063245264   0.009546079 -0.0012288604  0.037255641  0.016237082    0.04092225 -0.0019143276  0.036124092  0.028316428
googleandroid    0.148094852  0.391802162   0.711402608  0.0135394518  0.562702861  0.042955147    0.39143285 -0.0065777411  0.166181849  0.066952694
iphonecampos     0.623911948  0.541339970   0.117450549  0.0199942953  0.206584693  0.473266316    0.07694320  0.0293973265  0.321523039  0.272586547
samsungcampos    0.090099060  0.206019550   0.608840260  0.0539848377  0.295428414  0.028875432    0.81479929  0.0988362178  0.104495332  0.039427134
sonycampos       0.058852134  0.013254078   0.032897238  0.4089905736  0.013568090  0.016442392    0.16404312  0.5284523138  0.056574443  0.019616805
nokiacampos      0.017761487  0.053371499   0.016046872 -0.0008189489  0.030501288  0.018636402    0.05803788 -0.0012757645  0.028579684  0.029207022
htccampos        1.000000000  0.206896901   0.171078711  0.0161964828  0.450715453  0.163480446    0.11342051  0.0373285316  0.656659442  0.129812956
iphonecamneg     0.206896901  1.000000000   0.468562960  0.0655977000  0.507638049  0.643460020    0.26266536  0.0340240958  0.238011225  0.261983142
samsungcamneg    0.171078711  0.468562960   1.000000000  0.0723697042  0.661017063  0.061619492    0.70383784  0.0507620794  0.200439201  0.079343436
sonycamneg       0.016196483  0.065597700   0.072369704  1.0000000000  0.038003476  0.025059285    0.12861974  0.6030047959  0.036701616  0.024827910
htccamneg        0.450715453  0.507638049   0.661017063  0.0380034764  1.000000000  0.112118301    0.37440485  0.0247482526  0.661111712  0.098294710
iphonecamunc     0.163480446  0.643460020   0.061619492  0.0250592852  0.112118301  1.000000000    0.05614601  0.0473068441  0.171847066  0.209007616
samsungcamunc    0.113420510  0.262665362   0.703837836  0.1286197355  0.374404849  0.056146005    1.00000000  0.2879321571  0.178753184  0.067375292
sonycamunc       0.037328532  0.034024096   0.050762079  0.6030047959  0.024748253  0.047306844    0.28793216  1.0000000000  0.108464499  0.025677953
htccamunc        0.656659442  0.238011225   0.200439201  0.0367016156  0.661111712  0.171847066    0.17875318  0.1084644987  1.000000000  0.093889706
iphonedispos     0.129812956  0.261983142   0.079343436  0.0248279099  0.098294710  0.209007616    0.06737529  0.0256779528  0.093889706  1.000000000
sonydispos       0.022015622  0.015890753   0.037424924  0.3372117408  0.020739500  0.012209066    0.10970557  0.4434927247  0.037799552  0.023609805
nokiadispos      0.014629187  0.050013170   0.042391084 -0.0007300214  0.025720853  0.015624798    0.14095720 -0.0011372326  0.024518076  0.034499138
htcdispos        0.140446973  0.055439580   0.065595998  0.0151478630  0.135171608  0.023328496    0.05295388  0.0187454585  0.144467590  0.042102991
iphonedisneg     0.041086089  0.346878956   0.152069590  0.0400385215  0.151600245  0.253253711    0.09882361  0.0241162643  0.067470212  0.868765387
sonydisneg       0.003802480  0.009587943   0.014879656  0.1773804280  0.009023997  0.003641380    0.03090804  0.2388456015  0.009773739  0.004957519
htcdisneg        0.270474023  0.374707296   0.521369825  0.0827231553  0.728948508  0.079895368    0.34829348  0.0626007989  0.489481843  0.163987219
iphonedisunc     0.052906939  0.299074293   0.028896976  0.0271242781  0.044419886  0.361321734    0.03967813  0.0306197606  0.058927289  0.883026229
sonydisunc       0.025058938  0.036018255   0.063338287  0.5780401304  0.037615168  0.030042395    0.19905389  0.7174089400  0.074237080  0.027297522
nokiadisunc      0.006690145  0.018880092   0.051899349 -0.0006256755  0.011908284  0.005721107    0.17026170 -0.0009746818  0.011894442  0.009563336
htcdisunc        0.386465078  0.160153769   0.163322433  0.0752912506  0.474559542  0.085983009    0.18840329  0.1073824307  0.605687511  0.171822145
iphoneperpos     0.242604866  0.257568960   0.130965178  0.0265814721  0.160387571  0.190248578    0.09422706  0.0187935025  0.157350739  0.659353827
samsungperpos    0.067849952  0.172260776   0.590556431  0.0591163229  0.246822075  0.027429286    0.76866705  0.0580179791  0.097082536  0.092702667
sonyperpos       0.013336985  0.017790709   0.037777680  0.5019700016  0.012290232  0.009325653    0.09628229  0.3945468871  0.026428434  0.011714804
nokiaperpos      0.012832239  0.039337638   0.023510164 -0.0008913832  0.022928192  0.011472100    0.08237079 -0.0013886032  0.024826942  0.024834377
htcperpos        0.586175384  0.205400545   0.242173307  0.0227282674  0.550676429  0.092072136    0.17876940  0.0291620375  0.652254312  0.125257982
iphoneperneg     0.074396779  0.308875213   0.259390474  0.0446710591  0.246452247  0.113175498    0.16484231  0.0148161988  0.108741499  0.637768430
samsungperneg    0.133387536  0.361394034   0.825420862  0.0724224540  0.509565524  0.055070726    0.68441917  0.0507382344  0.174304894  0.142073015
sonyperneg       0.003725933  0.020559519   0.025463286  0.4654221318  0.008808122  0.004866392    0.04595525  0.2071803257  0.009450643  0.004743860
htcperneg        0.290333302  0.348535462   0.511629134  0.0281019160  0.756413279  0.070524621    0.32665886  0.0186629297  0.559357663  0.123985116
iphoneperunc     0.059577005  0.217579385   0.069828121  0.0266791114  0.099731103  0.174433158    0.07492203  0.0273592822  0.113207080  0.665237523
samsungperunc    0.058155116  0.138709352   0.389305581  0.0847821713  0.198497720  0.033915160    0.60256883  0.0860877971  0.108102559  0.157446323
sonyperunc       0.018080529  0.032569554   0.060836735  0.6040116918  0.029574377  0.025255729    0.15254243  0.5673580082  0.050625072  0.027680646
htcperunc        0.253677808  0.114715736   0.122047959  0.0262898504  0.425360579  0.057397481    0.12451633  0.0319632466  0.601513451  0.091895380
iosperpos       -0.006121324 -0.012228970   0.110072741 -0.0012764663 -0.010934307 -0.004920454    0.12901202 -0.0008902753 -0.007866478  0.020232063
googleperpos     0.163144592  0.417185277   0.658643639  0.0209042210  0.578324761  0.076915508    0.41737461 -0.0038250565  0.223305123  0.165575625
googleperunc     0.100031085  0.241002513   0.342119638 -0.0015319542  0.333727389  0.058138669    0.26943151 -0.0023864893  0.162430756  0.179686303
iphonesentiment -0.120434115 -0.083963139  -0.185988857 -0.0248264033 -0.222972178  0.001443485   -0.13804591 -0.0503268537 -0.148881468  0.014546824
                   sonydispos   nokiadispos    htcdispos iphonedisneg    sonydisneg     htcdisneg iphonedisunc    sonydisunc   nokiadisunc
iphone          -0.0038265686 -0.0082021539  0.007124916  0.175572621 -0.0027774491  0.0852733380  0.250929821 -0.0045529578 -0.0075880802
samsunggalaxy    0.0613598274  0.0102477757  0.024838634  0.017824130  0.0067858231  0.1888214611 -0.027879224  0.0605561472  0.0146608209
sonyxperia       0.2525892158 -0.0037722221  0.003299105 -0.013589598  0.1632854243 -0.0021376810 -0.017980754  0.2954284939 -0.0032330379
nokialumina     -0.0015281417  0.6502528013  0.010553720  0.023741675 -0.0006439773  0.0442224554  0.002681337 -0.0013832449  0.4913317753
googleandroid   -0.0016689972 -0.0041735324  0.057551786  0.121820615  0.0002154346  0.4470127937  0.017791369 -0.0047529101 -0.0035769868
iphonecampos     0.0177490834  0.0263172760  0.067428991  0.148650674  0.0028844028  0.1101016903  0.188310035  0.0194031753  0.0096083040
samsungcampos    0.0581222272  0.0383707155  0.032923476  0.065278833  0.0112361693  0.2384253937  0.012312670  0.0676677209  0.0468115176
sonycampos       0.4049930253 -0.0016362405  0.016457151  0.006716763  0.1318919118  0.0376238338  0.007383581  0.3888036304 -0.0014023637
nokiacampos     -0.0010183988  0.8708585691  0.008848525  0.022170003 -0.0004291655  0.0358414565  0.005012367 -0.0009218353  0.7956709983
htccampos        0.0220156223  0.0146291869  0.140446973  0.041086089  0.0038024800  0.2704740233  0.052906939  0.0250589384  0.0066901449
iphonecamneg     0.0158907533  0.0500131699  0.055439580  0.346878956  0.0095879433  0.3747072961  0.299074293  0.0360182554  0.0188800916
samsungcamneg    0.0374249243  0.0423910836  0.065595998  0.152069590  0.0148796562  0.5213698250  0.028896976  0.0633382871  0.0518993494
sonycamneg       0.3372117408 -0.0007300214  0.015147863  0.040038521  0.1773804280  0.0827231553  0.027124278  0.5780401304 -0.0006256755
htccamneg        0.0207394998  0.0257208533  0.135171608  0.151600245  0.0090239969  0.7289485078  0.044419886  0.0376151684  0.0119082844
iphonecamunc     0.0122090661  0.0156247976  0.023328496  0.253253711  0.0036413799  0.0798953676  0.361321734  0.0300423951  0.0057211073
samsungcamunc    0.1097055700  0.1409571952  0.052953879  0.098823606  0.0309080398  0.3482934774  0.039678126  0.1990538876  0.1702617047
sonycamunc       0.4434927247 -0.0011372326  0.018745458  0.024116264  0.2388456015  0.0626007989  0.030619761  0.7174089400 -0.0009746818
htccamunc        0.0377995521  0.0245180762  0.144467590  0.067470212  0.0097737393  0.4894818428  0.058927289  0.0742370804  0.0118944423
iphonedispos     0.0236098053  0.0344991383  0.042102991  0.868765387  0.0049575187  0.1639872192  0.883026229  0.0272975223  0.0095633363
sonydispos       1.0000000000 -0.0009078136  0.015831997  0.015453255  0.8972867887  0.0530167286  0.012680171  0.7730887035 -0.0007780549
nokiadispos     -0.0009078136  1.0000000000  0.007594766  0.026055893 -0.0003825636  0.0313410172  0.005563423 -0.0008217356  0.8846581356
htcdispos        0.0158319967  0.0075947659  1.000000000  0.034742979  0.0041258099  0.1804130745  0.030929188  0.0196386582  0.0034865140
iphonedisneg     0.0154532548  0.0260558932  0.034742979  1.000000000  0.0086647820  0.2227633513  0.879950578  0.0304502061  0.0072609590
sonydisneg       0.8972867887 -0.0003825636  0.004125810  0.008664782  1.0000000000  0.0251869341  0.004807408  0.6588319145 -0.0003278817
htcdisneg        0.0530167286  0.0313410172  0.180413075  0.222763351  0.0251869341  1.0000000000  0.136125810  0.0867272010  0.0149516316
iphonedisunc     0.0126801713  0.0055634233  0.030929188  0.879950578  0.0048074081  0.1361258096  1.000000000  0.0376336503  0.0011105463
sonydisunc       0.7730887035 -0.0008217356  0.019638658  0.030450206  0.6588319145  0.0867272010  0.037633650  1.0000000000 -0.0007042805
nokiadisunc     -0.0007780549  0.8846581356  0.003486514  0.007260959 -0.0003278817  0.0149516316  0.001110546 -0.0007042805  1.0000000000
htcdisunc        0.0574162246  0.0140462214  0.201852112  0.158725895  0.0200940302  0.7846720113  0.170131836  0.1276401084  0.0066257242
iphoneperpos     0.0101954743  0.0374559878  0.053774653  0.530888336  0.0014505894  0.2041175256  0.554364879  0.0144778088  0.0104387347
samsungperpos    0.0393349408  0.0637346396  0.064721089  0.116563641  0.0121258783  0.3706617924  0.079438177  0.0546811048  0.0772478297
sonyperpos       0.5024874205 -0.0011398179  0.008968510  0.008028103  0.3702542519  0.0339514148  0.005151596  0.4319240195 -0.0009768975
nokiaperpos     -0.0011084740  0.8594816669  0.007050365  0.018659378 -0.0004671243  0.0299734535  0.002340012 -0.0010033697  0.8155976672
htcperpos        0.0198549806  0.0239161992  0.170979971  0.100581679  0.0055861443  0.5757290234  0.084321539  0.0300892147  0.0129402565
iphoneperneg     0.0057365246  0.0313060407  0.046873496  0.640995104  0.0027343448  0.3033383674  0.564479458  0.0118804562  0.0105173290
samsungperneg    0.0369525459  0.0658243628  0.099958122  0.195391018  0.0142097172  0.6212857650  0.111568445  0.0570781706  0.0799512449
sonyperneg       0.1683775861 -0.0005630906  0.004142549  0.007229347  0.1075178973  0.0232962762  0.002926026  0.1845288613 -0.0004826050
htcperneg        0.0165139802  0.0305980536  0.155595016  0.173659920  0.0071159432  0.8428682456  0.095449680  0.0282706573  0.0166256552
iphoneperunc     0.0137922154  0.0182744228  0.045358575  0.570044418  0.0035375194  0.2236255729  0.623929443  0.0246677278  0.0079738761
samsungperunc    0.0542203186  0.1185634253  0.105644354  0.170623380  0.0193882137  0.5336743572  0.156926656  0.0918810680  0.1429974781
sonyperunc       0.3407659807 -0.0007149570  0.015451216  0.023878172  0.1126326681  0.0682515586  0.022878057  0.4765973052 -0.0006127643
htcperunc        0.0194072718  0.0144895062  0.135495011  0.096513956  0.0066827986  0.5497638190  0.092742570  0.0370091903  0.0073260463
iosperpos        0.0253918473  0.0795411340 -0.001147265  0.015557280  0.0302728027  0.0002582922  0.024055145  0.0145998730  0.1020369449
googleperpos     0.0001583647 -0.0024269726  0.118339703  0.218540632  0.0005795463  0.7041173806  0.132862181 -0.0027638895 -0.0020800722
googleperunc    -0.0019050522 -0.0015142114  0.124018219  0.204416264 -0.0008028119  0.6431743675  0.172276045 -0.0017244171 -0.0012977769
iphonesentiment -0.0386353028 -0.0259223780 -0.060405793  0.003144905 -0.0199561100 -0.1927272667  0.027172723 -0.0321371543 -0.0239719884
                   htcdisunc iphoneperpos samsungperpos    sonyperpos   nokiaperpos    htcperpos iphoneperneg samsungperneg    sonyperneg
iphone           0.024321607 -0.009507666    0.05153833 -0.0063266804 -0.0105089285  0.030621280  0.013863107    0.11513005 -3.624744e-03
samsunggalaxy    0.071745603 -0.003169429    0.24286639  0.0674892485  0.0018461920  0.088288678  0.045962586    0.30355991  9.976799e-03
sonyxperia       0.010002504 -0.028717034    0.02091401  0.2661417156 -0.0046060231  0.004676818 -0.028774205   -0.00193120  1.224067e-01
nokialumina      0.021114387  0.033345479    0.01745906 -0.0019186794  0.7374571504  0.039112646  0.033735414    0.01735403 -9.478623e-04
googleandroid    0.147068090  0.106060625    0.27035494  0.0008356533 -0.0022999416  0.209414192  0.212525142    0.55808994  5.656574e-03
iphonecampos     0.156063063  0.348332416    0.04522127  0.0139443738  0.0211775527  0.287084655  0.151918629    0.09203003  7.034030e-03
samsungcampos    0.086766462  0.056272259    0.79389853  0.0473952698  0.0215811648  0.115131849  0.112507527    0.54666952  1.936570e-02
sonycampos       0.055055469  0.009152257    0.04692259  0.3873108356 -0.0019979103  0.021325928  0.006280325    0.03414891  1.828294e-01
nokiacampos      0.016341701  0.034055807    0.02570991 -0.0012786647  0.8874498813  0.026395973  0.030735731    0.02620755 -6.316834e-04
htccampos        0.386465078  0.242604866    0.06784995  0.0133369850  0.0128322395  0.586175384  0.074396779    0.13338754  3.725933e-03
iphonecamneg     0.160153769  0.257568960    0.17226078  0.0177907090  0.0393376378  0.205400545  0.308875213    0.36139403  2.055952e-02
samsungcamneg    0.163322433  0.130965178    0.59055643  0.0377776797  0.0235101641  0.242173307  0.259390474    0.82542086  2.546329e-02
sonycamneg       0.075291251  0.026581472    0.05911632  0.5019700016 -0.0008913832  0.022728267  0.044671059    0.07242245  4.654221e-01
htccamneg        0.474559542  0.160387571    0.24682207  0.0122902322  0.0229281922  0.550676429  0.246452247    0.50956552  8.808122e-03
iphonecamunc     0.085983009  0.190248578    0.02742929  0.0093256533  0.0114721001  0.092072136  0.113175498    0.05507073  4.866392e-03
samsungcamunc    0.188403288  0.094227056    0.76866705  0.0962822917  0.0823707914  0.178769401  0.164842312    0.68441917  4.595525e-02
sonycamunc       0.107382431  0.018793503    0.05801798  0.3945468871 -0.0013886032  0.029162038  0.014816199    0.05073823  2.071803e-01
htccamunc        0.605687511  0.157350739    0.09708254  0.0264284339  0.0248269423  0.652254312  0.108741499    0.17430489  9.450643e-03
iphonedispos     0.171822145  0.659353827    0.09270267  0.0117148037  0.0248343768  0.125257982  0.637768430    0.14207302  4.743860e-03
sonydispos       0.057416225  0.010195474    0.03933494  0.5024874205 -0.0011084740  0.019854981  0.005736525    0.03695255  1.683776e-01
nokiadispos      0.014046221  0.037455988    0.06373464 -0.0011398179  0.8594816669  0.023916199  0.031306041    0.06582436 -5.630906e-04
htcdispos        0.201852112  0.053774653    0.06472109  0.0089685103  0.0070503649  0.170979971  0.046873496    0.09995812  4.142549e-03
iphonedisneg     0.158725895  0.530888336    0.11656364  0.0080281030  0.0186593778  0.100581679  0.640995104    0.19539102  7.229347e-03
sonydisneg       0.020094030  0.001450589    0.01212588  0.3702542519 -0.0004671243  0.005586144  0.002734345    0.01420972  1.075179e-01
htcdisneg        0.784672011  0.204117526    0.37066179  0.0339514148  0.0299734535  0.575729023  0.303338367    0.62128576  2.329628e-02
iphonedisunc     0.170131836  0.554364879    0.07943818  0.0051515957  0.0023400120  0.084321539  0.564479458    0.11156844  2.926026e-03
sonydisunc       0.127640108  0.014477809    0.05468110  0.4319240195 -0.0010033697  0.030089215  0.011880456    0.05707817  1.845289e-01
nokiadisunc      0.006625724  0.010438735    0.07724783 -0.0009768975  0.8155976672  0.012940257  0.010517329    0.07995124 -4.826050e-04
htcdisunc        1.000000000  0.203943525    0.28871404  0.0395562543  0.0150968302  0.686133766  0.201737692    0.40568853  2.009210e-02
iphoneperpos     0.203943525  1.000000000    0.14078523  0.0303379013  0.0287020602  0.207538166  0.794832452    0.21148491  1.847884e-02
samsungperpos    0.288714036  0.140785232    1.00000000  0.0679568359  0.0367672042  0.200799111  0.188500749    0.80201441  2.447628e-02
sonyperpos       0.039556254  0.030337901    0.06795684  1.0000000000 -0.0013917599  0.033139841  0.028906711    0.04473209  7.985169e-01
nokiaperpos      0.015096830  0.028702060    0.03676720 -0.0013917599  1.0000000000  0.034628681  0.023873403    0.03766309 -6.875545e-04
htcperpos        0.686133766  0.207538166    0.20079911  0.0331398411  0.0346286812  1.000000000  0.160466806    0.31688941  5.949372e-03
iphoneperneg     0.201737692  0.794832452    0.18850075  0.0289067106  0.0238734028  0.160466806  1.000000000    0.31633887  4.785234e-02
samsungperneg    0.405688534  0.211484913    0.80201441  0.0447320903  0.0376630853  0.316889412  0.316338873    1.00000000  3.156948e-02
sonyperneg       0.020092101  0.018478843    0.02447628  0.7985168799 -0.0006875545  0.005949372  0.047852337    0.03156948  1.000000e+00
htcperneg        0.677325871  0.182686004    0.32334289  0.0101492440  0.0445128708  0.715452925  0.292967979    0.56115875  8.005413e-03
iphoneperunc     0.246904143  0.791827630    0.14033769  0.0197639928  0.0143537817  0.141842551  0.759483720    0.19815955  1.557893e-02
samsungperunc    0.538856907  0.198968038    0.77721746  0.0536779030  0.0696745358  0.299447474  0.246448012    0.77616645  3.049156e-02
sonyperunc       0.083274559  0.036379583    0.05789643  0.7358017934 -0.0008729890  0.026075838  0.042156459    0.06080914  6.680181e-01
htcperunc        0.721406551  0.123390017    0.18956496  0.0140733766  0.0163730197  0.849738746  0.140183947    0.27124451  6.294713e-03
iosperpos        0.006433213  0.210342600    0.27420920  0.0057578219  0.0849480995 -0.002802508  0.247457091    0.20289225  3.962787e-05
googleperpos     0.483030041  0.240266652    0.44430247  0.0077119343  0.0020510756  0.380277622  0.345246653    0.75841110  1.053913e-02
googleperunc     0.593493504  0.237625431    0.42754226 -0.0023919145  0.0031408747  0.368326651  0.296226849    0.64122869 -1.181649e-03
iphonesentiment -0.132952797  0.029637900   -0.08106319 -0.0389127438 -0.0415946125 -0.178427038 -0.004804058   -0.13865698 -3.085009e-02
                   htcperneg iphoneperunc samsungperunc    sonyperunc     htcperunc     iosperpos  googleperpos  googleperunc iphonesentiment
iphone           0.075975448 -0.016037424   0.046821536 -0.0030454006  0.0114142610 -2.005936e-02  0.1180083451  0.0678592347     0.014858654
samsunggalaxy    0.178409744 -0.017389038   0.184775057  0.0374818698  0.0449278125 -5.801680e-03  0.2460460932  0.1422517633    -0.359172760
sonyxperia      -0.012082758 -0.028220476   0.008007867  0.1516751025 -0.0048880488 -1.100924e-02 -0.0084673071 -0.0079160304    -0.233169880
nokialumina      0.050050803  0.020197163   0.035274122 -0.0012035022  0.0237567173  3.071887e-02  0.0065145374  0.0079987614    -0.055961769
googleandroid    0.433411367  0.056676383   0.221725967 -0.0041353037  0.1096851420 -1.670167e-02  0.6385807446  0.3719984702    -0.189142050
iphonecampos     0.109391575  0.187259620   0.040154370  0.0199870203  0.0672830706 -3.991076e-03  0.1179019014  0.0730039141    -0.029731217
samsungcampos    0.231171524  0.031844675   0.487766617  0.0578600356  0.0613038312  1.024714e-01  0.2982813037  0.1591714963    -0.112743311
sonycampos       0.009013357  0.008176179   0.053436363  0.3788118033  0.0157814464 -3.118246e-03  0.0066726659 -0.0034336603    -0.090665090
nokiacampos      0.033774662  0.021551381   0.049252332 -0.0008020494  0.0172606280  1.031233e-01  0.0115643182  0.0125178481    -0.033374561
htccampos        0.290333302  0.059577005   0.058155116  0.0180805293  0.2536778076 -6.121324e-03  0.1631445917  0.1000310846    -0.120434115
iphonecamneg     0.348535462  0.217579385   0.138709352  0.0325695539  0.1147157357 -1.222897e-02  0.4171852771  0.2410025125    -0.083963139
samsungcamneg    0.511629134  0.069828121   0.389305581  0.0608367349  0.1220479586  1.100727e-01  0.6586436387  0.3421196381    -0.185988857
sonycamneg       0.028101916  0.026679111   0.084782171  0.6040116918  0.0262898504 -1.276466e-03  0.0209042210 -0.0015319542    -0.024826403
htccamneg        0.756413279  0.099731103   0.198497720  0.0295743766  0.4253605788 -1.093431e-02  0.5783247613  0.3337273887    -0.222972178
iphonecamunc     0.070524621  0.174433158   0.033915160  0.0252557290  0.0573974810 -4.920454e-03  0.0769155084  0.0581386691     0.001443485
samsungcamunc    0.326658858  0.074922026   0.602568833  0.1525424290  0.1245163314  1.290120e-01  0.4173746107  0.2694315123    -0.138045912
sonycamunc       0.018662930  0.027359282   0.086087797  0.5673580082  0.0319632466 -8.902753e-04 -0.0038250565 -0.0023864893    -0.050326854
htccamunc        0.559357663  0.113207080   0.108102559  0.0506250723  0.6015134514 -7.866478e-03  0.2233051228  0.1624307557    -0.148881468
iphonedispos     0.123985116  0.665237523   0.157446323  0.0276806457  0.0918953795  2.023206e-02  0.1655756249  0.1796863033     0.014546824
sonydispos       0.016513980  0.013792215   0.054220319  0.3407659807  0.0194072718  2.539185e-02  0.0001583647 -0.0019050522    -0.038635303
nokiadispos      0.030598054  0.018274423   0.118563425 -0.0007149570  0.0144895062  7.954113e-02 -0.0024269726 -0.0015142114    -0.025922378
htcdispos        0.155595016  0.045358575   0.105644354  0.0154512162  0.1354950113 -1.147265e-03  0.1183397028  0.1240182192    -0.060405793
iphonedisneg     0.173659920  0.570044418   0.170623380  0.0238781723  0.0965139559  1.555728e-02  0.2185406324  0.2044162638     0.003144905
sonydisneg       0.007115943  0.003537519   0.019388214  0.1126326681  0.0066827986  3.027280e-02  0.0005795463 -0.0008028119    -0.019956110
htcdisneg        0.842868246  0.223625573   0.533674357  0.0682515586  0.5497638190  2.582922e-04  0.7041173806  0.6431743675    -0.192727267
iphonedisunc     0.095449680  0.623929443   0.156926656  0.0228780568  0.0927425695  2.405514e-02  0.1328621807  0.1722760448     0.027172723
sonydisunc       0.028270657  0.024667728   0.091881068  0.4765973052  0.0370091903  1.459987e-02 -0.0027638895 -0.0017244171    -0.032137154
nokiadisunc      0.016625655  0.007973876   0.142997478 -0.0006127643  0.0073260463  1.020369e-01 -0.0020800722 -0.0012977769    -0.023971988
htcdisunc        0.677325871  0.246904143   0.538856907  0.0832745588  0.7214065507  6.433213e-03  0.4830300411  0.5934935043    -0.132952797
iphoneperpos     0.182686004  0.791827630   0.198968038  0.0363795828  0.1233900172  2.103426e-01  0.2402666517  0.2376254310     0.029637900
samsungperpos    0.323342889  0.140337688   0.777217462  0.0578964261  0.1895649611  2.742092e-01  0.4443024721  0.4275422572    -0.081063185
sonyperpos       0.010149244  0.019763993   0.053677903  0.7358017934  0.0140733766  5.757822e-03  0.0077119343 -0.0023919145    -0.038912744
nokiaperpos      0.044512871  0.014353782   0.069674536 -0.0008729890  0.0163730197  8.494810e-02  0.0020510756  0.0031408747    -0.041594613
htcperpos        0.715452925  0.141842551   0.299447474  0.0260758379  0.8497387456 -2.802508e-03  0.3802776224  0.3683266512    -0.178427038
iphoneperneg     0.292967979  0.759483720   0.246448012  0.0421564591  0.1401839465  2.474571e-01  0.3452466533  0.2962268490    -0.004804058
samsungperneg    0.561158754  0.198159549   0.776166450  0.0608091432  0.2712445087  2.028922e-01  0.7584110978  0.6412286948    -0.138656977
sonyperneg       0.008005413  0.015578928   0.030491565  0.6680181250  0.0062947128  3.962787e-05  0.0105391295 -0.0011816490    -0.030850090
htcperneg        1.000000000  0.184686248   0.436321786  0.0217050252  0.6596521039 -3.589854e-03  0.6288764765  0.5399024601    -0.209196046
iphoneperunc     0.184686248  1.000000000   0.256577185  0.0506531465  0.1714356260  1.666605e-01  0.2427354166  0.2971400100     0.037199859
samsungperunc    0.436321786  0.256577185   1.000000000  0.0919282907  0.3467050878  1.028045e-01  0.6164418804  0.7398874595    -0.057919616
sonyperunc       0.021705025  0.050653146   0.091928291  1.0000000000  0.0332332811 -2.861120e-03 -0.0024047420 -0.0015003415    -0.018084032
htcperunc        0.659652104  0.171435626   0.346705088  0.0332332811  1.0000000000  9.694017e-04  0.3330219017  0.3945515791    -0.114171252
iosperpos       -0.003589854  0.166660487   0.102804453 -0.0028611203  0.0009694017  1.000000e+00 -0.0097122766 -0.0060595823    -0.015757978
googleperpos     0.628876477  0.242735417   0.616441880 -0.0024047420  0.3330219017 -9.712277e-03  1.0000000000  0.8870329991    -0.137261491
googleperunc     0.539902460  0.297140010   0.739887460 -0.0015003415  0.3945515791 -6.059582e-03  0.8870329991  1.0000000000    -0.070284159
iphonesentiment -0.209196046  0.037199859  -0.057919616 -0.0180840317 -0.1141712521 -1.575798e-02 -0.1372614910 -0.0702841593     1.000000000

Near Zero Variables

Removing near zero vars:

str(nzvMetrics)
'data.frame':   46 obs. of  4 variables:
 $ freqRatio    : num  5.04 14.13 44.17 497.88 61.25 ...
 $ percentUnique: num  0.2081 0.054 0.0385 0.0231 0.0462 ...
 $ zeroVar      : logi  FALSE FALSE FALSE FALSE FALSE FALSE ...
 $ nzv          : logi  FALSE FALSE TRUE TRUE TRUE FALSE ...
iphoneNZV <- remove_nzv(iphoneCOR)
 int [1:35] 3 4 5 7 8 9 10 11 12 13 ...
Classes ‘tbl_df’, ‘tbl’ and 'data.frame':   12973 obs. of  11 variables:
 $ iphone         : num  1 1 1 1 1 41 1 1 1 1 ...
 $ samsunggalaxy  : num  0 0 0 0 0 0 0 0 0 0 ...
 $ iphonecampos   : num  0 0 0 0 0 1 1 0 0 0 ...
 $ iphonecamunc   : num  0 0 0 0 0 7 1 0 0 0 ...
 $ iphonedispos   : num  0 0 0 0 0 1 13 0 0 0 ...
 $ iphonedisneg   : num  0 0 0 0 0 3 10 0 0 0 ...
 $ iphonedisunc   : num  0 0 0 0 0 4 9 0 0 0 ...
 $ iphoneperpos   : num  0 1 0 1 1 0 5 3 0 0 ...
 $ iphoneperneg   : num  0 0 0 0 0 0 4 1 0 0 ...
 $ iphoneperunc   : num  0 0 0 1 0 0 5 0 0 0 ...
 $ iphonesentiment: num  0 0 0 0 0 4 4 0 0 0 ...
paste("NZV number of features after cleanup: ", ncol(iphoneNZV))
[1] "NZV number of features after cleanup:  11"

Recursive Feature Elimination (RFE)

# Get results
rfe_results

Recursive feature selection

Outer resampling method: Cross-Validated (10 fold, repeated 5 times) 

Resampling performance over subset size:

The top 5 variables (out of 23):
   iphone, googleandroid, iphonecamneg, samsunggalaxy, iphonedisunc
# Plot results
plot(rfe_results, type=c("g", "o"))

Create a new dataset with the best features found by RFE

# review outcome
str(iphoneRFE)
Classes ‘tbl_df’, ‘tbl’ and 'data.frame':   12973 obs. of  24 variables:
 $ iphone         : num  1 1 1 1 1 41 1 1 1 1 ...
 $ googleandroid  : num  0 0 0 0 0 0 0 0 0 0 ...
 $ iphonecamneg   : num  0 0 0 0 0 3 1 0 0 0 ...
 $ samsunggalaxy  : num  0 0 0 0 0 0 0 0 0 0 ...
 $ iphonedisunc   : num  0 0 0 0 0 4 9 0 0 0 ...
 $ iphonedispos   : num  0 0 0 0 0 1 13 0 0 0 ...
 $ iphonecampos   : num  0 0 0 0 0 1 1 0 0 0 ...
 $ htcphone       : num  0 0 0 0 0 0 0 0 0 0 ...
 $ sonyxperia     : num  0 0 0 0 0 0 0 0 0 0 ...
 $ htccampos      : num  0 0 0 0 0 0 0 0 0 0 ...
 $ iphonedisneg   : num  0 0 0 0 0 3 10 0 0 0 ...
 $ iphoneperneg   : num  0 0 0 0 0 0 4 1 0 0 ...
 $ iphonecamunc   : num  0 0 0 0 0 7 1 0 0 0 ...
 $ iphoneperunc   : num  0 0 0 1 0 0 5 0 0 0 ...
 $ iphoneperpos   : num  0 1 0 1 1 0 5 3 0 0 ...
 $ htccamneg      : num  0 0 0 0 0 0 0 0 0 0 ...
 $ ios            : num  0 0 0 0 0 6 0 0 0 0 ...
 $ sonyperpos     : num  0 0 0 0 0 0 0 0 0 0 ...
 $ htcperneg      : num  0 0 0 0 0 0 0 0 0 0 ...
 $ htcdispos      : num  0 0 0 0 0 0 0 0 0 0 ...
 $ htcperpos      : num  0 0 0 0 0 0 0 0 0 0 ...
 $ samsungperpos  : num  0 0 0 0 0 0 0 0 0 0 ...
 $ htcdisneg      : num  0 0 0 0 0 0 0 0 0 0 ...
 $ iphonesentiment: Factor w/ 6 levels "0","1","2","3",..: 1 1 1 1 1 5 5 1 1 1 ...

Models training

Preprocess label and Data Partition

df <- iphoneDF
df$iphonesentiment <- as.factor(df$iphonesentiment)
plot_ly(df, x= ~df$iphonesentiment, type='histogram')


set.seed(90210)
dataPar <- createDataPartition(df$iphonesentiment, p = .70, list = FALSE)
train_df <- df[dataPar,]
test_df <- df[-dataPar,]

#iphoneCOR
iphoneCOR$iphonesentiment <- as.factor(iphoneCOR$iphonesentiment)
set.seed(90210)
dataParCOR <- createDataPartition(iphoneCOR$iphonesentiment, p = .70, list = FALSE)
train_dfCOR <- iphoneCOR[dataParCOR,]
test_dfCOR <- iphoneCOR[-dataParCOR,]

#iphoneRFE
iphoneRFE$iphonesentiment <- as.factor(iphoneRFE$iphonesentiment)
set.seed(90210)
dataParRFE <- createDataPartition(iphoneRFE$iphonesentiment, p = .70, list = FALSE)
train_dfRFE <- iphoneRFE[dataParRFE,]
test_dfRFE <- iphoneRFE[-dataParRFE,]

#iphoneNZV
iphoneNZV$iphonesentiment <- as.factor(iphoneNZV$iphonesentiment)
set.seed(90210)
dataParNZV <- createDataPartition(iphoneNZV$iphonesentiment, p = .70, list = FALSE)
train_dfNZV <- iphoneNZV[dataParNZV,]
test_dfNZV <- iphoneNZV[-dataParNZV,]

Cross Validation Fit Control

# cross validation 
fitControl <- trainControl(method = "repeatedcv", number = 10, repeats = 2)

C5.0 Model

dt_c50["model"]
$model
C5.0 

12973 samples
   58 predictor
    6 classes: '0', '1', '2', '3', '4', '5' 

No pre-processing
Resampling: Cross-Validated (10 fold, repeated 2 times) 
Summary of sample sizes: 11677, 11674, 11676, 11676, 11676, 11675, ... 
Resampling results across tuning parameters:

  model  winnow  trials  Accuracy   Kappa    
  rules  FALSE    1      0.7737209  0.5590898
  rules  FALSE   10      0.7614249  0.5410935
  rules  FALSE   20      0.7614249  0.5410935
  rules   TRUE    1      0.7736439  0.5591387
  rules   TRUE   10      0.7617337  0.5406033
  rules   TRUE   20      0.7617337  0.5406033
  tree   FALSE    1      0.7733737  0.5593810
  tree   FALSE   10      0.7634685  0.5453704
  tree   FALSE   20      0.7634685  0.5453704
  tree    TRUE    1      0.7731810  0.5592255
  tree    TRUE   10      0.7629284  0.5440406
  tree    TRUE   20      0.7629284  0.5440406

Accuracy was used to select the optimal model using the largest value.
The final values used for the model were trials = 1, model = rules and winnow = FALSE.
dt_c50["post_resample"]
$post_resample
 Accuracy     Kappa 
0.7825193 0.5778812 

Train model with RFE dataset:

dt_c50_rfe["model"]
$model
C5.0 

12973 samples
   23 predictor
    6 classes: '0', '1', '2', '3', '4', '5' 

No pre-processing
Resampling: Cross-Validated (10 fold, repeated 2 times) 
Summary of sample sizes: 11677, 11674, 11676, 11676, 11676, 11675, ... 
Resampling results across tuning parameters:

  model  winnow  trials  Accuracy   Kappa    
  rules  FALSE    1      0.7740290  0.5600546
  rules  FALSE   10      0.7623892  0.5427900
  rules  FALSE   20      0.7623892  0.5427900
  rules   TRUE    1      0.7740288  0.5597324
  rules   TRUE   10      0.7611172  0.5406425
  rules   TRUE   20      0.7611172  0.5406425
  tree   FALSE    1      0.7732968  0.5593145
  tree   FALSE   10      0.7631219  0.5455098
  tree   FALSE   20      0.7631219  0.5455098
  tree    TRUE    1      0.7729115  0.5586864
  tree    TRUE   10      0.7622742  0.5433828
  tree    TRUE   20      0.7622742  0.5433828

Accuracy was used to select the optimal model using the largest value.
The final values used for the model were trials = 1, model = rules and winnow = FALSE.
dt_c50_rfe["post_resample"]
$post_resample
 Accuracy     Kappa 
0.7835476 0.5804822 

Train model with NZV dataset:

dt_c50_nzv["model"]
$model
C5.0 

12973 samples
   10 predictor
    6 classes: '0', '1', '2', '3', '4', '5' 

No pre-processing
Resampling: Cross-Validated (10 fold, repeated 2 times) 
Summary of sample sizes: 11677, 11674, 11676, 11676, 11676, 11675, ... 
Resampling results across tuning parameters:

  model  winnow  trials  Accuracy   Kappa    
  rules  FALSE    1      0.7401114  0.4866984
  rules  FALSE   10      0.7319034  0.4742414
  rules  FALSE   20      0.7319034  0.4742414
  rules   TRUE    1      0.7394180  0.4854711
  rules   TRUE   10      0.7310553  0.4730982
  rules   TRUE   20      0.7310553  0.4730982
  tree   FALSE    1      0.7391486  0.4866780
  tree   FALSE   10      0.7302069  0.4706857
  tree   FALSE   20      0.7302069  0.4706857
  tree    TRUE    1      0.7389944  0.4864320
  tree    TRUE   10      0.7300908  0.4708487
  tree    TRUE   20      0.7300908  0.4708487

Accuracy was used to select the optimal model using the largest value.
The final values used for the model were trials = 1, model = rules and winnow = FALSE.
dt_c50_nzv["post_resample"]
$post_resample
 Accuracy     Kappa 
0.7506427 0.5081955 

Train model with COR dataset:

dt_c50_cor["model"]
$model
C5.0 

12973 samples
   45 predictor
    6 classes: '0', '1', '2', '3', '4', '5' 

No pre-processing
Resampling: Cross-Validated (10 fold, repeated 2 times) 
Summary of sample sizes: 11677, 11674, 11676, 11676, 11676, 11675, ... 
Resampling results across tuning parameters:

  model  winnow  trials  Accuracy   Kappa    
  rules  FALSE    1      0.7743757  0.5606897
  rules  FALSE   10      0.7621185  0.5419558
  rules  FALSE   20      0.7621185  0.5419558
  rules   TRUE    1      0.7737976  0.5598019
  rules   TRUE   10      0.7631205  0.5428532
  rules   TRUE   20      0.7631205  0.5428532
  tree   FALSE    1      0.7736819  0.5603542
  tree   FALSE   10      0.7638918  0.5463209
  tree   FALSE   20      0.7638918  0.5463209
  tree    TRUE    1      0.7729497  0.5587624
  tree    TRUE   10      0.7623890  0.5429986
  tree    TRUE   20      0.7623890  0.5429986

Accuracy was used to select the optimal model using the largest value.
The final values used for the model were trials = 1, model = rules and winnow = FALSE.
dt_c50_cor["post_resample"]
$post_resample
 Accuracy     Kappa 
0.7827763 0.5785862 

Random Forest Model

print("Random Forest: Full Dataset")
[1] "Random Forest: Full Dataset"
#rf <- caret_train(df, test_df, 'rf', fitControl)
rf["model"]
$model
Random Forest 

12973 samples
   58 predictor
    6 classes: '0', '1', '2', '3', '4', '5' 

No pre-processing
Resampling: Cross-Validated (10 fold, repeated 2 times) 
Summary of sample sizes: 11677, 11674, 11676, 11676, 11676, 11675, ... 
Resampling results across tuning parameters:

  mtry  Accuracy   Kappa    
   2    0.7025741  0.3766211
  30    0.7747220  0.5659126
  58    0.7653169  0.5525870

Accuracy was used to select the optimal model using the largest value.
The final value used for the model was mtry = 30.
rf["post_resample"]
$post_resample
 Accuracy     Kappa 
0.8087404 0.6350611 
print("Random Forest: RFE")
[1] "Random Forest: RFE"
#rf_rfe <- caret_train(iphoneRFE, test_dfRFE, 'rf', fitControl)
rf_rfe["model"]
$model
Random Forest 

12973 samples
   23 predictor
    6 classes: '0', '1', '2', '3', '4', '5' 

No pre-processing
Resampling: Cross-Validated (10 fold, repeated 2 times) 
Summary of sample sizes: 11677, 11674, 11676, 11676, 11676, 11675, ... 
Resampling results across tuning parameters:

  mtry  Accuracy   Kappa    
   2    0.7162565  0.4137442
  12    0.7744137  0.5656213
  23    0.7654712  0.5530633

Accuracy was used to select the optimal model using the largest value.
The final value used for the model was mtry = 12.
rf_rfe["post_resample"]
$post_resample
 Accuracy     Kappa 
0.8087404 0.6354706 
print("Random Forest: NZV")
[1] "Random Forest: NZV"
#rf_nzv <- caret_train(iphoneNZV, test_dfNZV, 'rf', fitControl)
rf_nzv["model"]
$model
Random Forest 

12973 samples
   10 predictor
    6 classes: '0', '1', '2', '3', '4', '5' 

No pre-processing
Resampling: Cross-Validated (10 fold, repeated 2 times) 
Summary of sample sizes: 11677, 11674, 11676, 11676, 11676, 11675, ... 
Resampling results across tuning parameters:

  mtry  Accuracy   Kappa    
   2    0.7428480  0.4922026
   6    0.7373744  0.4878246
  10    0.7300894  0.4784915

Accuracy was used to select the optimal model using the largest value.
The final value used for the model was mtry = 2.
rf_nzv["post_resample"]
$post_resample
Accuracy    Kappa 
0.762982 0.535005 
print("Random Forest: COR")
[1] "Random Forest: COR"
#rf_cor <- caret_train(iphoneCOR, test_dfCOR, 'rf', fitControl)
rf_cor["model"]
$model
Random Forest 

12973 samples
   45 predictor
    6 classes: '0', '1', '2', '3', '4', '5' 

No pre-processing
Resampling: Cross-Validated (10 fold, repeated 2 times) 
Summary of sample sizes: 11677, 11674, 11676, 11676, 11676, 11675, ... 
Resampling results across tuning parameters:

  mtry  Accuracy   Kappa    
   2    0.6930535  0.3494518
  23    0.7737582  0.5639659
  45    0.7652399  0.5521969

Accuracy was used to select the optimal model using the largest value.
The final value used for the model was mtry = 23.
rf_cor["post_resample"]
$post_resample
 Accuracy     Kappa 
0.8077121 0.6331703 

Support Vector Machine (SVM) Model

print("SVM: Full Dataset")
[1] "SVM: Full Dataset"
#svm_train_full <- svm_train(df, test_df)
svm_train_full["model"]
$model

Call:
svm(formula = ..1, data = ..2)


Parameters:
   SVM-Type:  C-classification 
 SVM-Kernel:  radial 
       cost:  1 
      gamma:  0.01724138 

Number of Support Vectors:  6666
svm_train_full["post_resample"]
$post_resample
 Accuracy     Kappa 
0.7128535 0.4155794 
print("SVM: RFE")
[1] "SVM: RFE"
#svm_train_rfe <- svm_train(iphoneRFE, test_dfRFE)
svm_train_rfe["model"]
$model

Call:
svm(formula = ..1, data = ..2)


Parameters:
   SVM-Type:  C-classification 
 SVM-Kernel:  radial 
       cost:  1 
      gamma:  0.04347826 

Number of Support Vectors:  6596
svm_train_rfe["post_resample"]
$post_resample
 Accuracy     Kappa 
0.7285347 0.4529316 
print("SVM: NZV")
[1] "SVM: NZV"
#svm_train_nzv <- svm_train(iphoneNZV, test_dfNZV)
svm_train_nzv["model"]
$model

Call:
svm(formula = ..1, data = ..2)


Parameters:
   SVM-Type:  C-classification 
 SVM-Kernel:  radial 
       cost:  1 
      gamma:  0.1 

Number of Support Vectors:  6983
svm_train_nzv["post_resample"]
$post_resample
 Accuracy     Kappa 
0.7259640 0.4518386 
print("SVM: COR")
[1] "SVM: COR"
#svm_train_cor <- svm_train(iphoneCOR, test_dfCOR)
svm_train_cor["model"]
$model

Call:
svm(formula = ..1, data = ..2)


Parameters:
   SVM-Type:  C-classification 
 SVM-Kernel:  radial 
       cost:  1 
      gamma:  0.02222222 

Number of Support Vectors:  6578
svm_train_cor["post_resample"]
$post_resample
 Accuracy     Kappa 
0.7095116 0.4071791 

K-nearest Neighbors (KNN) Model

print("KNN: Full Dataset")
[1] "KNN: Full Dataset"
#knn_train_full <- knn_train(df, test_df)
knn_train_full["model"]$model

Call:
FUN(formula = ..1, data = ..2)

Type of response variable: nominal
Minimal misclassification: 0.6522778
Best kernel: optimal
Best k: 11
knn_train_full["post_resample"]
$post_resample
 Accuracy     Kappa 
0.3652956 0.2064675 
print("KNN: RFE")
[1] "KNN: RFE"
#knn_train_rfe <- knn_train(iphoneRFE, test_dfRFE)
knn_train_rfe["model"]$model

Call:
FUN(formula = ..1, data = ..2)

Type of response variable: nominal
Minimal misclassification: 0.6505049
Best kernel: optimal
Best k: 11
knn_train_rfe["post_resample"]
$post_resample
 Accuracy     Kappa 
0.3632391 0.2036046 
print("KNN: NZV")
[1] "KNN: NZV"
#knn_train_nzv <- knn_train(iphoneNZV, test_dfNZV)
knn_train_nzv["model"]$model

Call:
FUN(formula = ..1, data = ..2)

Type of response variable: nominal
Minimal misclassification: 0.6857319
Best kernel: optimal
Best k: 11
knn_train_nzv["post_resample"]
$post_resample
 Accuracy     Kappa 
0.3300771 0.1613920 
print("KNN: COR")
[1] "KNN: COR"
#knn_train_cor <- knn_train(iphoneCOR, test_dfCOR)
knn_train_cor["model"]$model

Call:
FUN(formula = ..1, data = ..2)

Type of response variable: nominal
Minimal misclassification: 0.653357
Best kernel: optimal
Best k: 11
knn_train_cor["post_resample"]
$post_resample
 Accuracy     Kappa 
0.3629820 0.2049409 

Models Performance

Grouped bar chart to evaluate model performance

Confusioon Matrix comparison

Note: The KNN showed so poor performance in the Accuracy and Kappa metrics, that was discarded from the analysis.

# Creating confusion matrix
iphone_cm_dt <- confusionMatrix(predict(dt_c50_rfe["model"], test_df)$model, test_df$iphonesentiment)
plot_confusion_matrix(iphone_cm_dt, "C5.0")


iphone_cmsvm <- confusionMatrix(predict(svm_train_rfe["model"], test_df)$model, test_df$iphonesentiment) 
plot_confusion_matrix(iphone_cmsvm, "SVM")


iphone_cmRF <- confusionMatrix(predict(rf_rfe["model"], test_df)$model, test_df$iphonesentiment) 
plot_confusion_matrix(iphone_cmRF, "Random Forest")


print("C5.0 detail")
[1] "C5.0 detail"
iphone_cm_dt
Confusion Matrix and Statistics

          Reference
Prediction    0    1    2    3    4    5
         0  399    0    1    1    4    5
         1    0    0    0    0    0    0
         2    0    0   16    0    0    0
         3    1    1    1  234    1    4
         4    2    0    0    2  151    5
         5  186  116  118  119  275 2248

Overall Statistics
                                          
               Accuracy : 0.7835          
                 95% CI : (0.7703, 0.7964)
    No Information Rate : 0.5815          
    P-Value [Acc > NIR] : < 2.2e-16       
                                          
                  Kappa : 0.5805          
                                          
 Mcnemar's Test P-Value : NA              

Statistics by Class:

                     Class: 0 Class: 1 Class: 2 Class: 3 Class: 4 Class: 5
Sensitivity            0.6786  0.00000 0.117647  0.65730  0.35035   0.9938
Specificity            0.9967  1.00000 1.000000  0.99774  0.99740   0.5000
Pos Pred Value         0.9732      NaN 1.000000  0.96694  0.94375   0.7342
Neg Pred Value         0.9457  0.96992 0.969024  0.96656  0.92493   0.9831
Prevalence             0.1512  0.03008 0.034961  0.09152  0.11080   0.5815
Detection Rate         0.1026  0.00000 0.004113  0.06015  0.03882   0.5779
Detection Prevalence   0.1054  0.00000 0.004113  0.06221  0.04113   0.7871
Balanced Accuracy      0.8376  0.50000 0.558824  0.82752  0.67387   0.7469
print("\n-------------------------------------------------------------------------------")
[1] "\n-------------------------------------------------------------------------------"
print("RF detail")
[1] "RF detail"
iphone_cmRF
Confusion Matrix and Statistics

          Reference
Prediction    0    1    2    3    4    5
         0  422    0    0    0    2    4
         1    0    6    0    0    0    0
         2    0    1   30    0    0    0
         3    0    0    0  245    0    1
         4    1    0    0    0  186    0
         5  165  110  106  111  243 2257

Overall Statistics
                                        
               Accuracy : 0.8087        
                 95% CI : (0.796, 0.821)
    No Information Rate : 0.5815        
    P-Value [Acc > NIR] : < 2.2e-16     
                                        
                  Kappa : 0.6355        
                                        
 Mcnemar's Test P-Value : NA            

Statistics by Class:

                     Class: 0 Class: 1 Class: 2 Class: 3 Class: 4 Class: 5
Sensitivity            0.7177 0.051282 0.220588  0.68820  0.43155   0.9978
Specificity            0.9982 1.000000 0.999734  0.99972  0.99971   0.5485
Pos Pred Value         0.9860 1.000000 0.967742  0.99593  0.99465   0.7543
Neg Pred Value         0.9521 0.971421 0.972532  0.96954  0.93384   0.9944
Prevalence             0.1512 0.030077 0.034961  0.09152  0.11080   0.5815
Detection Rate         0.1085 0.001542 0.007712  0.06298  0.04781   0.5802
Detection Prevalence   0.1100 0.001542 0.007969  0.06324  0.04807   0.7692
Balanced Accuracy      0.8579 0.525641 0.610161  0.84396  0.71563   0.7732
print("\n-------------------------------------------------------------------------------")
[1] "\n-------------------------------------------------------------------------------"
print("SVM detail")
[1] "SVM detail"
iphone_cmsvm
Confusion Matrix and Statistics

          Reference
Prediction    0    1    2    3    4    5
         0  349    1    2    4    9   18
         1    0    0    0    0    0    0
         2    0    0    1    0    0    0
         3    2    1   17  113    2    9
         4    1    0    0    0  138    2
         5  236  115  116  239  282 2233

Overall Statistics
                                          
               Accuracy : 0.7285          
                 95% CI : (0.7143, 0.7425)
    No Information Rate : 0.5815          
    P-Value [Acc > NIR] : < 2.2e-16       
                                          
                  Kappa : 0.4529          
                                          
 Mcnemar's Test P-Value : NA              

Statistics by Class:

                     Class: 0 Class: 1  Class: 2 Class: 3 Class: 4 Class: 5
Sensitivity           0.59354  0.00000 0.0073529  0.31742  0.32019   0.9872
Specificity           0.98970  1.00000 1.0000000  0.99123  0.99913   0.3931
Pos Pred Value        0.91123      NaN 1.0000000  0.78472  0.97872   0.6933
Neg Pred Value        0.93185  0.96992 0.9652867  0.93513  0.92185   0.9567
Prevalence            0.15116  0.03008 0.0349614  0.09152  0.11080   0.5815
Detection Rate        0.08972  0.00000 0.0002571  0.02905  0.03548   0.5740
Detection Prevalence  0.09846  0.00000 0.0002571  0.03702  0.03625   0.8280
Balanced Accuracy     0.79162  0.50000 0.5036765  0.65432  0.65966   0.6901

Model Selection

The Accuracy shown by the Random Forest model, using the Recursive Feature Elimination technique, was the highest. It also showed the best-balanced accuracy on the confusion matrix analysis. However, caution should be taken since all the models tend to classify occurrences to the “Very Positive (5)” class.

Large dataset prediction

Pre processing the large datatset

str(large_df)
Classes ‘tbl_df’, ‘tbl’ and 'data.frame':   59139 obs. of  24 variables:
 $ iphone         : num  0 0 0 0 0 0 0 0 0 0 ...
 $ googleandroid  : num  7 0 0 0 0 0 0 0 0 0 ...
 $ iphonecamneg   : num  0 0 0 0 0 0 0 0 0 0 ...
 $ samsunggalaxy  : num  0 0 0 0 0 1 0 0 0 0 ...
 $ iphonedisunc   : num  0 0 0 0 0 0 0 0 0 0 ...
 $ iphonedispos   : num  0 0 0 0 1 0 0 0 0 0 ...
 $ iphonecampos   : num  0 0 0 0 0 0 0 0 0 0 ...
 $ htcphone       : num  0 0 0 0 0 0 0 0 2 0 ...
 $ sonyxperia     : num  0 0 0 0 0 0 0 0 0 0 ...
 $ htccampos      : num  0 0 0 0 0 0 0 0 0 0 ...
 $ iphonedisneg   : num  0 0 0 0 0 0 0 0 0 0 ...
 $ iphoneperneg   : num  0 0 0 0 0 0 0 0 0 0 ...
 $ iphonecamunc   : num  0 0 0 0 0 0 0 0 0 0 ...
 $ iphoneperunc   : num  0 0 0 0 0 0 0 0 0 0 ...
 $ iphoneperpos   : num  0 0 0 0 9 0 0 0 0 0 ...
 $ htccamneg      : num  0 0 0 0 0 0 0 0 0 0 ...
 $ ios            : num  0 0 1 1 0 0 0 0 0 0 ...
 $ sonyperpos     : num  0 0 0 0 0 0 0 0 0 0 ...
 $ htcperneg      : num  0 0 0 0 0 0 0 0 0 0 ...
 $ htcdispos      : num  0 0 0 0 0 0 0 0 0 0 ...
 $ htcperpos      : num  0 0 0 0 0 0 0 0 0 0 ...
 $ samsungperpos  : num  0 0 0 0 0 0 0 0 0 0 ...
 $ htcdisneg      : num  0 0 0 0 0 0 0 0 0 0 ...
 $ iphonesentiment: Factor w/ 6 levels "0","1","2","3",..: 3 1 1 1 1 1 1 1 1 1 ...

Apply Model on the large dataset

    0     1     2     3     4     5 
39749     0  3757  1756    94 13783 

iPhone Sentiments Results

iphonesentiment <- summary(iphone_predicted)
iphonesentiment_df <- data.frame("Categorie"=c("Very Negative", "Negative", "Somewhat Negative", "Somewhat Positive", "Positive", "Very Positive"), iphonesentiment)
iphone_sent_data <- iphonesentiment_df[,c('Categorie', 'iphonesentiment')]

iphone_pie <- plot_ly(iphone_sent_data, labels = ~Categorie, values = ~iphonesentiment, type = 'pie') %>%
  layout(title = 'iPhone Sentiment - Nov 2019',
         xaxis = list(showgrid = FALSE, zeroline = FALSE, showticklabels = FALSE),
         yaxis = list(showgrid = FALSE, zeroline = FALSE, showticklabels = FALSE))

iphone_pie

NA

Galaxy analysis

Load training dataset for Galaxy labeled sentiment.

Explore structure and descriptive statistics from the training datasets

Classes ‘spec_tbl_df’, ‘tbl_df’, ‘tbl’ and 'data.frame':    12911 obs. of  59 variables:
 $ iphone         : num  1 1 1 0 1 2 1 1 4 1 ...
 $ samsunggalaxy  : num  0 0 1 0 0 0 0 0 0 0 ...
 $ sonyxperia     : num  0 0 0 0 0 0 0 0 0 0 ...
 $ nokialumina    : num  0 0 0 0 0 0 0 0 0 0 ...
 $ htcphone       : num  0 0 0 1 0 0 0 0 0 0 ...
 $ ios            : num  0 0 0 0 0 0 0 0 0 0 ...
 $ googleandroid  : num  0 0 0 0 0 0 0 0 0 0 ...
 $ iphonecampos   : num  0 0 1 0 0 1 0 0 0 0 ...
 $ samsungcampos  : num  0 0 1 0 0 0 0 0 0 0 ...
 $ sonycampos     : num  0 0 0 0 0 0 0 0 0 0 ...
 $ nokiacampos    : num  0 0 0 0 0 0 0 0 0 0 ...
 $ htccampos      : num  0 0 0 0 0 0 0 0 0 0 ...
 $ iphonecamneg   : num  0 0 0 0 0 0 0 0 0 0 ...
 $ samsungcamneg  : num  0 0 0 0 0 0 0 0 0 0 ...
 $ sonycamneg     : num  0 0 0 0 0 0 0 0 0 0 ...
 $ nokiacamneg    : num  0 0 0 0 0 0 0 0 0 0 ...
 $ htccamneg      : num  0 0 0 0 0 0 0 0 0 0 ...
 $ iphonecamunc   : num  0 0 0 0 0 0 0 0 0 0 ...
 $ samsungcamunc  : num  0 0 0 0 0 0 0 0 0 0 ...
 $ sonycamunc     : num  0 0 0 0 0 0 0 0 0 0 ...
 $ nokiacamunc    : num  0 0 0 0 0 0 0 0 0 0 ...
 $ htccamunc      : num  0 0 0 0 0 0 0 0 0 0 ...
 $ iphonedispos   : num  0 1 0 0 0 0 2 0 0 0 ...
 $ samsungdispos  : num  0 0 0 0 0 0 0 0 0 0 ...
 $ sonydispos     : num  0 0 0 0 0 0 0 0 0 0 ...
 $ nokiadispos    : num  0 0 0 0 0 0 0 0 0 0 ...
 $ htcdispos      : num  0 0 0 1 0 0 0 0 0 0 ...
 $ iphonedisneg   : num  0 1 0 0 0 0 0 0 0 0 ...
 $ samsungdisneg  : num  0 0 0 0 0 0 0 0 0 0 ...
 $ sonydisneg     : num  0 0 0 0 0 0 0 0 0 0 ...
 $ nokiadisneg    : num  0 0 0 0 0 0 0 0 0 0 ...
 $ htcdisneg      : num  0 0 0 0 0 0 0 0 0 0 ...
 $ iphonedisunc   : num  0 1 0 0 0 0 0 0 0 0 ...
 $ samsungdisunc  : num  0 0 0 0 0 0 0 0 0 0 ...
 $ sonydisunc     : num  0 0 0 0 0 0 0 0 0 0 ...
 $ nokiadisunc    : num  0 0 0 0 0 0 0 0 0 0 ...
 $ htcdisunc      : num  0 0 0 1 0 0 0 0 0 0 ...
 $ iphoneperpos   : num  0 0 0 0 0 0 0 0 0 0 ...
 $ samsungperpos  : num  0 0 0 0 0 0 0 0 0 0 ...
 $ sonyperpos     : num  0 0 0 0 0 0 0 0 0 0 ...
 $ nokiaperpos    : num  0 0 0 0 0 0 0 0 0 0 ...
 $ htcperpos      : num  0 0 0 1 0 0 0 0 0 0 ...
 $ iphoneperneg   : num  0 0 0 0 0 0 0 0 0 0 ...
 $ samsungperneg  : num  0 0 0 0 0 0 0 0 0 0 ...
 $ sonyperneg     : num  0 0 0 0 0 0 0 0 0 0 ...
 $ nokiaperneg    : num  0 0 0 0 0 0 0 0 0 0 ...
 $ htcperneg      : num  0 0 0 1 0 0 0 0 0 0 ...
 $ iphoneperunc   : num  0 0 0 0 0 0 0 0 0 0 ...
 $ samsungperunc  : num  0 0 0 0 0 0 0 0 0 0 ...
 $ sonyperunc     : num  0 0 0 0 0 0 0 0 0 0 ...
 $ nokiaperunc    : num  0 0 0 0 0 0 0 0 0 0 ...
 $ htcperunc      : num  0 0 0 1 0 0 0 0 0 0 ...
 $ iosperpos      : num  0 0 0 0 0 0 0 0 0 0 ...
 $ googleperpos   : num  0 0 0 0 0 0 0 0 0 0 ...
 $ iosperneg      : num  0 0 0 0 0 0 0 0 0 0 ...
 $ googleperneg   : num  0 0 0 0 0 0 0 0 0 0 ...
 $ iosperunc      : num  0 0 0 0 0 0 0 0 0 0 ...
 $ googleperunc   : num  0 0 0 0 0 0 0 0 0 0 ...
 $ galaxysentiment: num  5 3 3 0 1 0 3 5 5 5 ...
 - attr(*, "spec")=
  .. cols(
  ..   iphone = col_double(),
  ..   samsunggalaxy = col_double(),
  ..   sonyxperia = col_double(),
  ..   nokialumina = col_double(),
  ..   htcphone = col_double(),
  ..   ios = col_double(),
  ..   googleandroid = col_double(),
  ..   iphonecampos = col_double(),
  ..   samsungcampos = col_double(),
  ..   sonycampos = col_double(),
  ..   nokiacampos = col_double(),
  ..   htccampos = col_double(),
  ..   iphonecamneg = col_double(),
  ..   samsungcamneg = col_double(),
  ..   sonycamneg = col_double(),
  ..   nokiacamneg = col_double(),
  ..   htccamneg = col_double(),
  ..   iphonecamunc = col_double(),
  ..   samsungcamunc = col_double(),
  ..   sonycamunc = col_double(),
  ..   nokiacamunc = col_double(),
  ..   htccamunc = col_double(),
  ..   iphonedispos = col_double(),
  ..   samsungdispos = col_double(),
  ..   sonydispos = col_double(),
  ..   nokiadispos = col_double(),
  ..   htcdispos = col_double(),
  ..   iphonedisneg = col_double(),
  ..   samsungdisneg = col_double(),
  ..   sonydisneg = col_double(),
  ..   nokiadisneg = col_double(),
  ..   htcdisneg = col_double(),
  ..   iphonedisunc = col_double(),
  ..   samsungdisunc = col_double(),
  ..   sonydisunc = col_double(),
  ..   nokiadisunc = col_double(),
  ..   htcdisunc = col_double(),
  ..   iphoneperpos = col_double(),
  ..   samsungperpos = col_double(),
  ..   sonyperpos = col_double(),
  ..   nokiaperpos = col_double(),
  ..   htcperpos = col_double(),
  ..   iphoneperneg = col_double(),
  ..   samsungperneg = col_double(),
  ..   sonyperneg = col_double(),
  ..   nokiaperneg = col_double(),
  ..   htcperneg = col_double(),
  ..   iphoneperunc = col_double(),
  ..   samsungperunc = col_double(),
  ..   sonyperunc = col_double(),
  ..   nokiaperunc = col_double(),
  ..   htcperunc = col_double(),
  ..   iosperpos = col_double(),
  ..   googleperpos = col_double(),
  ..   iosperneg = col_double(),
  ..   googleperneg = col_double(),
  ..   iosperunc = col_double(),
  ..   googleperunc = col_double(),
  ..   galaxysentiment = col_double()
  .. )
[1] "Number of NA values:  0"

Labeled sentiment distribution.

Feature selection methods

Features Correlation

Explore correlation between all variables:

[1] "Number of original features:  59"
[1] "Number of features after cleanup:  45"
                      iphone samsunggalaxy   sonyxperia   nokialumina googleandroid iphonecampos samsungcampos    sonycampos   nokiacampos    htccampos iphonecamneg
iphone           1.000000000  0.0159758824 -0.012286712 -0.0130113124  9.911322e-02  0.076295824   0.052992160 -4.962867e-03 -0.0081977557  0.020662226   0.48683867
samsunggalaxy    0.015975882  1.0000000000  0.365927086 -0.0061241775  2.365132e-01  0.030226679   0.251187527  1.431587e-01 -0.0004208775  0.064727423   0.12658077
sonyxperia      -0.012286712  0.3659270858  1.000000000 -0.0063682064 -1.831529e-02  0.004342369   0.047772377  3.956986e-01 -0.0042439494  0.015445636  -0.00659557
nokialumina     -0.013011312 -0.0061241775 -0.006368206  1.0000000000 -1.135554e-03  0.029849094   0.009313174 -2.745737e-03  0.7004134278  0.021305512   0.06335817
googleandroid    0.099113220  0.2365132133 -0.018315293 -0.0011355541  1.000000e+00  0.104609347   0.316091592  6.317932e-05  0.0032731478  0.148268865   0.39278639
iphonecampos     0.076295824  0.0302266793  0.004342369  0.0298490944  1.046093e-01  1.000000000   0.061825315  4.283929e-02  0.0308391601  0.623980191   0.54125534
samsungcampos    0.052992160  0.2511875267  0.047772377  0.0093131738  3.160916e-01  0.061825315   1.000000000  1.383410e-01  0.0148804509  0.089190057   0.20672704
sonycampos      -0.004962867  0.1431586761  0.395698570 -0.0027457373  6.317932e-05  0.042839287   0.138340996  1.000000e+00 -0.0018298355  0.055581987   0.01408191
nokiacampos     -0.008197756 -0.0004208775 -0.004243949  0.7004134278  3.273148e-03  0.030839160   0.014880451 -1.829835e-03  1.0000000000  0.017769641   0.05346503
htccampos        0.020662226  0.0647274234  0.015445636  0.0213055121  1.482689e-01  0.623980191   0.089190057  5.558199e-02  0.0177696412  1.000000000   0.20681456
iphonecamneg     0.486838671  0.1265807721 -0.006595570  0.0633581661  3.927864e-01  0.541255338   0.206727037  1.408191e-02  0.0534650264  0.206814560   1.00000000
samsungcamneg    0.132278137  0.3431998318 -0.004337150  0.0095239562  7.118048e-01  0.117535498   0.609595152  3.385279e-02  0.0160323804  0.171152386   0.46936281
sonycamneg      -0.001986513  0.0318343254  0.345739434 -0.0012347698  1.353562e-02  0.020010336   0.054051800  4.187958e-01 -0.0008228848  0.016203898   0.06571066
htccamneg        0.096622616  0.2233020453 -0.012283129  0.0372938000  5.637609e-01  0.206429162   0.296198758  1.415598e-02  0.0305337980  0.450658200   0.50737953
iphonecamunc     0.753152812 -0.0105756133 -0.008327405  0.0162333473  4.302227e-02  0.472868820   0.028294931  1.410885e-02  0.0186384662  0.163268790   0.64353327
samsungcamunc    0.068032597  0.3150353606  0.054648136  0.0410932344  3.932418e-01  0.075985545   0.814604639  1.494158e-01  0.0582691439  0.112022472   0.26430805
sonycamunc      -0.003312885  0.1001951538  0.375616052 -0.0018820683 -6.458182e-03  0.026713461   0.090161420  5.041064e-01 -0.0012542625  0.033332634   0.03553190
htccamunc        0.023494448  0.0720148991  0.012459032  0.0361538036  1.664470e-01  0.321226556   0.102951218  5.015381e-02  0.0286030395  0.656374073   0.23805823
iphonedispos     0.050588211 -0.0068241879 -0.018810355  0.0283753991  6.721825e-02  0.272020461   0.038917295  1.727241e-02  0.0292639996  0.129513283   0.26071081
sonydispos      -0.003914300  0.0581107226  0.248496900 -0.0014962654 -1.519812e-03  0.015963755   0.052294667  3.894317e-01 -0.0009971527  0.019414697   0.01634326
nokiadispos     -0.007942698  0.0102400922 -0.003783108  0.6502512874 -4.187951e-03  0.026336481   0.038418859 -1.631138e-03  0.8708580895  0.014635993   0.05010051
htcdispos        0.006425287  0.0245791320  0.002804346  0.0105571174  5.761226e-02  0.067175123   0.032484523  1.468756e-02  0.0088510422  0.140080137   0.05507328
iphonedisneg     0.172609774  0.0180385513 -0.013498805  0.0237839324  1.221613e-01  0.148001721   0.065558511  7.323172e-03  0.0222069463  0.040840392   0.34562123
htcdisneg        0.078832554  0.1896615395 -0.002079164  0.0443578021  4.486681e-01  0.109666214   0.239485974  3.889049e-02  0.0359506128  0.270154321   0.37375644
iphonedisunc     0.250368664 -0.0279505858 -0.018136040  0.0026849101  1.789794e-02  0.187795121   0.012245921  7.127826e-03  0.0050196241  0.052940070   0.29831140
sonydisunc      -0.004557554  0.0589819631  0.293355488 -0.0013687069 -4.696619e-03  0.018572316   0.064983719  3.858238e-01 -0.0009121443  0.023850936   0.03627401
nokiadisunc     -0.007329545  0.0146600770 -0.003242367  0.4913298135 -3.589343e-03  0.009618139   0.046869519 -1.397990e-03  0.7956704142  0.006693764   0.01891563
htcdisunc        0.022020409  0.0714041218  0.009337082  0.0211081991  1.471779e-01  0.155843934   0.086244790  5.358163e-02  0.0163379550  0.386183683   0.16002457
iphoneperpos    -0.011451806 -0.0029781969 -0.029083547  0.0335940854  1.070325e-01  0.348601360   0.056420038  7.997179e-03  0.0342884696  0.243836267   0.25598508
samsungperpos    0.047500473  0.2425264791  0.019924925  0.0174552954  2.705687e-01  0.044969832   0.794039192  4.376105e-02  0.0257102240  0.067464470   0.17263500
sonyperpos      -0.006343534  0.0652618116  0.263129425 -0.0019005140  9.389863e-04  0.012722445   0.043407123  3.790633e-01 -0.0012665552  0.011545140   0.01808213
nokiaperpos     -0.010160149  0.0018261992 -0.004619320  0.7374555536 -2.315855e-03  0.021195419   0.021609551 -1.991681e-03  0.8874493622  0.012838659   0.03940870
htcperpos        0.028089577  0.0879891195  0.003950125  0.0391281410  2.096614e-01  0.286933372   0.114619592  1.872501e-02  0.0264063815  0.585917433   0.20513127
iphoneperneg     0.010518187  0.0465117438 -0.028702428  0.0338966995  2.136740e-01  0.150869485   0.113258841  7.059611e-03  0.0308754722  0.074613471   0.30706117
samsungperneg    0.106749860  0.3038066396 -0.001954085  0.0173346109  5.583978e-01  0.092098343   0.547344221  3.511480e-02  0.0261948975  0.133444763   0.36201131
sonyperneg      -0.003602071  0.0099727656  0.122561372 -0.0009524197  5.650718e-03  0.007042108   0.019390524  1.872270e-01 -0.0006347189  0.003728334   0.02059739
htcperneg        0.069886518  0.1786390850 -0.012097255  0.0500551081  4.338376e-01  0.109208496   0.231575898  9.464047e-03  0.0337776182  0.290115236   0.34827876
iphoneperunc    -0.017262453 -0.0175850522 -0.028945578  0.0203441781  5.723250e-02  0.186508454   0.031457128  5.526194e-03  0.0216900158  0.059597922   0.21594149
samsungperunc    0.043282408  0.1838181677  0.005806506  0.0353135388  2.221077e-01  0.039569012   0.486675117  4.517428e-02  0.0493032042  0.057295111   0.13917577
sonyperunc      -0.003120365  0.0322934086  0.144721901 -0.0011523967 -3.954366e-03  0.017350460   0.049085877  3.507086e-01 -0.0007679891  0.014065561   0.03376090
htcperunc        0.009997149  0.0443416350 -0.005960459  0.0237591934  1.097906e-01  0.066921823   0.060374395  1.164314e-02  0.0172626278  0.253226653   0.11462812
iosperpos       -0.019640311 -0.0058791543 -0.011047769  0.0306965276 -1.675981e-02 -0.003970216   0.102605291 -3.016055e-03  0.1031108218 -0.006118699  -0.01222786
googleperpos     0.109452690  0.2462351542 -0.008498579  0.0064950782  6.389390e-01  0.117983631   0.298652253  6.980536e-03  0.0115514955  0.163213490   0.41789462
googleperunc     0.062893979  0.1423566132 -0.007938922  0.0079866214  3.722011e-01  0.073053737   0.159368133 -3.422972e-03  0.0125098136  0.100072177   0.24141023
galaxysentiment  0.001486419 -0.3452572476 -0.219816736 -0.0524690374 -1.867333e-01 -0.031921433  -0.112302827 -7.633660e-02 -0.0317719348 -0.120156303  -0.09380504
                samsungcamneg    sonycamneg    htccamneg iphonecamunc samsungcamunc    sonycamunc    htccamunc iphonedispos    sonydispos   nokiadispos    htcdispos
iphone            0.132278137 -0.0019865130  0.096622616  0.753152812    0.06803260 -0.0033128854  0.023494448  0.050588211 -0.0039143004 -0.0079426985  0.006425287
samsunggalaxy     0.343199832  0.0318343254  0.223302045 -0.010575613    0.31503536  0.1001951538  0.072014899 -0.006824188  0.0581107226  0.0102400922  0.024579132
sonyxperia       -0.004337150  0.3457394341 -0.012283129 -0.008327405    0.05464814  0.3756160524  0.012459032 -0.018810355  0.2484969002 -0.0037831085  0.002804346
nokialumina       0.009523956 -0.0012347698  0.037293800  0.016233347    0.04109323 -0.0018820683  0.036153804  0.028375399 -0.0014962654  0.6502512874  0.010557117
googleandroid     0.711804841  0.0135356245  0.563760872  0.043022274    0.39324183 -0.0064581819  0.166446985  0.067218249 -0.0015198122 -0.0041879508  0.057612262
iphonecampos      0.117535498  0.0200103357  0.206429162  0.472868820    0.07598554  0.0267134605  0.321226556  0.272020461  0.0159637550  0.0263364813  0.067175123
samsungcampos     0.609595152  0.0540518003  0.296198758  0.028294931    0.81460464  0.0901614198  0.102951218  0.038917295  0.0522946669  0.0384188592  0.032484523
sonycampos        0.033852793  0.4187958018  0.014155977  0.014108853    0.14941578  0.5041063856  0.050153807  0.017272409  0.3894316948 -0.0016311378  0.014687562
nokiacampos       0.016032380 -0.0008228848  0.030533798  0.018638466    0.05826914 -0.0012542625  0.028603040  0.029264000 -0.0009971527  0.8708580895  0.008851042
htccampos         0.171152386  0.0162038977  0.450658200  0.163268790    0.11202247  0.0333326335  0.656374073  0.129513283  0.0194146971  0.0146359935  0.140080137
iphonecamneg      0.469362814  0.0657106556  0.507379531  0.643533267    0.26430805  0.0355318999  0.238058232  0.260710811  0.0163432624  0.0501005147  0.055073277
samsungcamneg     1.000000000  0.0723582129  0.661845110  0.061626704    0.70657361  0.0523458964  0.200606311  0.079504956  0.0378475866  0.0423789265  0.065611461
sonycamneg        0.072358213  1.0000000000  0.038046004  0.025065974    0.12911931  0.6195594283  0.036731446  0.024876690  0.3401151094 -0.0007335296  0.015151205
htccamneg         0.661845110  0.0380460043  1.000000000  0.112019364    0.37639034  0.0257262549  0.661600909  0.097655204  0.0210901306  0.0257479718  0.134664817
iphonecamunc      0.061626704  0.0250659745  0.112019364  1.000000000    0.05527974  0.0457067259  0.171512337  0.208010744  0.0106923233  0.0156260511  0.023134483
samsungcamunc     0.706573610  0.1291193050  0.376390343  0.055279740    1.00000000  0.2763878776  0.176456371  0.066626391  0.0996402101  0.1415037274  0.052301499
sonycamunc        0.052345896  0.6195594283  0.025726255  0.045706726    0.27638788  1.0000000000  0.103149644  0.023371459  0.4286878754 -0.0011180650  0.016941863
htccamunc         0.200606311  0.0367314462  0.661600909  0.171512337    0.17645637  0.1031496443  1.000000000  0.093329703  0.0334978533  0.0245381147  0.143998293
iphonedispos      0.079504956  0.0248766905  0.097655204  0.208010744    0.06662639  0.0233714593  0.093329703  1.000000000  0.0221560903  0.0345644301  0.041786692
sonydispos        0.037847587  0.3401151094  0.021090131  0.010692323    0.09964021  0.4286878754  0.033497853  0.022156090  1.0000000000 -0.0008888742  0.014680260
nokiadispos       0.042378926 -0.0007335296  0.025747972  0.015626051    0.14150373 -0.0011180650  0.024538115  0.034564430 -0.0008888742  1.0000000000  0.007596963
htcdispos         0.065611461  0.0151512047  0.134664817  0.023134483    0.05230150  0.0169418630  0.143998293  0.041786692  0.0146802595  0.0075969626  1.000000000
iphonedisneg      0.152305538  0.0400996898  0.150847009  0.252535247    0.09952661  0.0252735063  0.067282465  0.868720357  0.0158660097  0.0260975626  0.034490470
htcdisneg         0.522947200  0.0829721860  0.728174027  0.079731791    0.35077187  0.0647763162  0.489988487  0.163302550  0.0537794451  0.0314364879  0.179899738
iphonedisunc      0.028940257  0.0271650147  0.044440007  0.360715941    0.03971173  0.0310306676  0.058952985  0.882939762  0.0125436654  0.0055715471  0.030931102
sonydisunc        0.063488466  0.5790540898  0.037791326  0.029373239    0.19501211  0.7244443648  0.072326128  0.026630266  0.7733231222 -0.0008130966  0.019094765
nokiadisunc       0.051889331 -0.0006286820  0.011918755  0.005717685    0.17091871 -0.0009582535  0.011904286  0.009584988 -0.0007618222  0.8846578244  0.003487942
htcdisunc         0.163333108  0.0752989448  0.474574337  0.085783093    0.18802277  0.1073498560  0.605571862  0.171737497  0.0562511116  0.0140427807  0.201602203
iphoneperpos      0.131846303  0.0267678791  0.161081445  0.188991734    0.09464301  0.0178586165  0.158034630  0.659821399  0.0094872194  0.0377035865  0.053898665
samsungperpos     0.590673242  0.0591238698  0.247191782  0.027182576    0.77002761  0.0550258992  0.096468857  0.092635710  0.0371239009  0.0637431699  0.064549101
sonyperpos        0.037976843  0.5038519746  0.012444880  0.008284567    0.08947057  0.3865275071  0.023473868  0.010662984  0.4973628888 -0.0011290228  0.008152522
nokiaperpos       0.023494568 -0.0008956676  0.022950647  0.011469184    0.08269580 -0.0013652000  0.024847326  0.024884472 -0.0010853492  0.8594811452  0.007052720
htcperpos         0.242285252  0.0227371438  0.550520059  0.091857322    0.17828345  0.0266339486  0.652164693  0.124960320  0.0181760550  0.0239256466  0.170629759
iphoneperneg      0.260478673  0.0448634452  0.247144175  0.111406373    0.16640430  0.0159134207  0.109169341  0.637549393  0.0061659170  0.0314458957  0.046924817
samsungperneg     0.825413224  0.0724121035  0.510198709  0.055077108    0.68707277  0.0522990421  0.174449277  0.142340126  0.0373592535  0.0658140036  0.099978540
sonyperneg        0.025453573  0.4654207125  0.008815092  0.004863074    0.04613737  0.2128897766  0.009458497  0.004756778  0.1698345260 -0.0005657962  0.004143993
htcperneg         0.511819454  0.0281035690  0.756238655  0.070405135    0.32810967  0.0194387547  0.559545613  0.123731381  0.0168009689  0.0306008837  0.155288679
iphoneperunc      0.070284556  0.0268405586  0.100213759  0.173075423    0.07450152  0.0251337436  0.113378069  0.665294228  0.0122575802  0.0183932842  0.045415445
samsungperunc     0.389700324  0.0848666908  0.198978258  0.033407544    0.60162206  0.0782549244  0.106729580  0.157307185  0.0490246723  0.1186802557  0.105329698
sonyperunc        0.062291749  0.6167570374  0.030471304  0.023247994    0.13836230  0.5465862762  0.044407034  0.025636756  0.3239207897 -0.0006845949  0.013749083
htcperunc         0.122074971  0.0262945924  0.425499338  0.057106935    0.12321338  0.0280088185  0.601226027  0.091618846  0.0168874444  0.0144910357  0.135199606
iosperpos         0.110025325 -0.0012905333 -0.010969479 -0.004955082    0.12954259 -0.0007133983 -0.007871624  0.020291829  0.0257158843  0.0795296120 -0.001143231
googleperpos      0.658628613  0.0208928764  0.579045749  0.076935372    0.41900390 -0.0037606539  0.223489645  0.165883144  0.0002494674 -0.0024386792  0.118363114
googleperunc      0.342103059 -0.0015393253  0.334138609  0.058156494    0.27047941 -0.0023462799  0.162562910  0.180008358 -0.0018653187 -0.0015214970  0.124039759
galaxysentiment  -0.182639799 -0.0096129035 -0.222750052 -0.010329492   -0.13602847 -0.0478723363 -0.148868149  0.009145782 -0.0346666196 -0.0244783494 -0.060638338
                iphonedisneg     htcdisneg iphonedisunc    sonydisunc   nokiadisunc    htcdisunc iphoneperpos samsungperpos    sonyperpos   nokiaperpos    htcperpos
iphone           0.172609774  0.0788325543  0.250368664 -0.0045575542 -0.0073295453  0.022020409 -0.011451806    0.04750047 -0.0063435345 -0.0101601493  0.028089577
samsunggalaxy    0.018038551  0.1896615395 -0.027950586  0.0589819631  0.0146600770  0.071404122 -0.002978197    0.24252648  0.0652618116  0.0018261992  0.087989119
sonyxperia      -0.013498805 -0.0020791636 -0.018136040  0.2933554885 -0.0032423668  0.009337082 -0.029083547    0.01992493  0.2631294247 -0.0046193197  0.003950125
nokialumina      0.023783932  0.0443578021  0.002684910 -0.0013687069  0.4913298135  0.021108199  0.033594085    0.01745530 -0.0019005140  0.7374555536  0.039128141
googleandroid    0.122161278  0.4486681246  0.017897938 -0.0046966192 -0.0035893426  0.147177949  0.107032469    0.27056867  0.0009389863 -0.0023158553  0.209661399
iphonecampos     0.148001721  0.1096662143  0.187795121  0.0185723164  0.0096181394  0.155843934  0.348601360    0.04496983  0.0127224448  0.0211954185  0.286933372
samsungcampos    0.065558511  0.2394859740  0.012245921  0.0649837193  0.0468695194  0.086244790  0.056420038    0.79403919  0.0434071228  0.0216095512  0.114619592
sonycampos       0.007323172  0.0388904870  0.007127826  0.3858238407 -0.0013979898  0.053581627  0.007997179    0.04376105  0.3790633475 -0.0019916814  0.018725011
nokiacampos      0.022206946  0.0359506128  0.005019624 -0.0009121443  0.7956704142  0.016337955  0.034288470    0.02571022 -0.0012665552  0.8874493622  0.026406382
htccampos        0.040840392  0.2701543215  0.052940070  0.0238509364  0.0066937639  0.386183683  0.243836267    0.06746447  0.0115451401  0.0128386590  0.585917433
iphonecamneg     0.345621229  0.3737564440  0.298311398  0.0362740143  0.0189156292  0.160024569  0.255985078    0.17263500  0.0180821258  0.0394087035  0.205131267
samsungcamneg    0.152305538  0.5229471998  0.028940257  0.0634884660  0.0518893313  0.163333108  0.131846303    0.59067324  0.0379768430  0.0234945681  0.242285252
sonycamneg       0.040099690  0.0829721860  0.027165015  0.5790540898 -0.0006286820  0.075298945  0.026767879    0.05912387  0.5038519746 -0.0008956676  0.022737144
htccamneg        0.150847009  0.7281740266  0.044440007  0.0377913258  0.0119187549  0.474574337  0.161081445    0.24719178  0.0124448803  0.0229506466  0.550520059
iphonecamunc     0.252535247  0.0797317910  0.360715941  0.0293732389  0.0057176852  0.085783093  0.188991734    0.02718258  0.0082845669  0.0114691841  0.091857322
samsungcamunc    0.099526606  0.3507718687  0.039711728  0.1950121112  0.1709187126  0.188022768  0.094643007    0.77002761  0.0894705738  0.0826958050  0.178283450
sonycamunc       0.025273506  0.0647763162  0.031030668  0.7244443648 -0.0009582535  0.107349856  0.017858616    0.05502590  0.3865275071 -0.0013652000  0.026633949
htccamunc        0.067282465  0.4899884870  0.058952985  0.0723261276  0.0119042862  0.605571862  0.158034630    0.09646886  0.0234738684  0.0248473261  0.652164693
iphonedispos     0.868720357  0.1633025503  0.882939762  0.0266302657  0.0095849876  0.171737497  0.659821399    0.09263571  0.0106629843  0.0248844725  0.124960320
sonydispos       0.015866010  0.0537794451  0.012543665  0.7733231222 -0.0007618222  0.056251112  0.009487219    0.03712390  0.4973628888 -0.0010853492  0.018176055
nokiadispos      0.026097563  0.0314364879  0.005571547 -0.0008130966  0.8846578244  0.014042781  0.037703586    0.06374317 -0.0011290228  0.8594811452  0.023925647
htcdispos        0.034490470  0.1798997375  0.030931102  0.0190947646  0.0034879421  0.201602203  0.053898665    0.06454910  0.0081525224  0.0070527199  0.170629759
iphonedisneg     1.000000000  0.2219404332  0.880107029  0.0306654489  0.0072757236  0.158704927  0.531155368    0.11680182  0.0082444957  0.0186919893  0.100317942
htcdisneg        0.221940433  1.0000000000  0.136580523  0.0872062027  0.0149978620  0.786143615  0.205140392    0.37187083  0.0342804173  0.0300652657  0.576015591
iphonedisunc     0.880107029  0.1365805231  1.000000000  0.0376366646  0.0011119755  0.170364518  0.553950753    0.07953309  0.0050039925  0.0023431972  0.084441901
sonydisunc       0.030665449  0.0872062027  0.037636665  1.0000000000 -0.0006968759  0.127130140  0.014220228    0.05363224  0.4291960132 -0.0009928219  0.029316556
nokiadisunc      0.007275724  0.0149978620  0.001111975 -0.0006968759  1.0000000000  0.006621782  0.010528207    0.07725968 -0.0009676450  0.8155971857  0.012944834
htcdisunc        0.158704927  0.7861436147  0.170364518  0.1271301402  0.0066217822  1.000000000  0.205011563    0.28855496  0.0386061069  0.0150922661  0.686086502
iphoneperpos     0.531155368  0.2051403919  0.553950753  0.0142202285  0.0105282074  0.205011563  1.000000000    0.14156677  0.0300859020  0.0289079140  0.208630091
samsungperpos    0.116801822  0.3718708282  0.079533087  0.0536322440  0.0772596832  0.288554961  0.141566772    1.00000000  0.0665326710  0.0367693433  0.200644278
sonyperpos       0.008244496  0.0342804173  0.005003992  0.4291960132 -0.0009676450  0.038606107  0.030085902    0.06653267  1.0000000000 -0.0013785799  0.032050307
nokiaperpos      0.018691989  0.0300652657  0.002343197 -0.0009928219  0.8155971857  0.015092266  0.028907914    0.03676934 -0.0013785799  1.0000000000  0.034642769
htcperpos        0.100317942  0.5760155913  0.084441901  0.0293165561  0.0129448340  0.686086502  0.208630091    0.20064428  0.0320503067  0.0346427690  1.000000000
iphoneperneg     0.641028889  0.3044839313  0.563691874  0.0121188710  0.0105733165  0.202481428  0.793886713    0.18937327  0.0293769682  0.0239879988  0.161017008
samsungperneg    0.195684738  0.6231598574  0.111738270  0.0572125973  0.0799428428  0.405749722  0.212807435    0.80217656  0.0449504380  0.0376495370  0.317036864
sonyperneg       0.007243647  0.0233672927  0.002930222  0.1848570209 -0.0004849237  0.020091072  0.018611143    0.02447781  0.8014976195 -0.0006908587  0.005950830
htcperneg        0.173327375  0.8435392166  0.095575003  0.0283886470  0.0166239994  0.677341540  0.183633348    0.32354097  0.0102755695  0.0445193996  0.715453696
iphoneperunc     0.570373895  0.2251199253  0.623368394  0.0240732544  0.0080370774  0.248031792  0.790788386    0.14087387  0.0188198511  0.0144588959  0.142440001
samsungperunc    0.171119626  0.5358334913  0.157224860  0.0895545825  0.1431371747  0.538900501  0.200057561    0.77725127  0.0501597829  0.0697453289  0.299245835
sonyperunc       0.024816669  0.0701301831  0.022979295  0.4753187875 -0.0005867418  0.082426524  0.035977140    0.05511066  0.7362796362 -0.0008359165  0.023715438
htcperunc        0.096483023  0.5507719471  0.092843375  0.0358684435  0.0073259556  0.721315436  0.123955167    0.18923727  0.0123505505  0.0163745389  0.849830199
iosperpos        0.015601144  0.0002663561  0.024090992  0.0146658890  0.1020280212  0.006410506  0.211681423    0.27425435  0.0058395486  0.0849337440 -0.002810574
googleperpos     0.218866627  0.7062400990  0.133064557 -0.0027348811 -0.0020901045  0.483106809  0.241751221    0.44438563  0.0077913188  0.0020368901  0.380455595
googleperunc     0.204713023  0.6451038643  0.172537052 -0.0017062980 -0.0013040206  0.593592947  0.239047349    0.42762213 -0.0023692751  0.0031320281  0.368496557
galaxysentiment -0.003520125 -0.1905757250  0.019354502 -0.0309999101 -0.0232864672 -0.132600148  0.024619007   -0.07743060 -0.0233561418 -0.0358926611 -0.175447029
                iphoneperneg samsungperneg    sonyperneg    htcperneg iphoneperunc samsungperunc    sonyperunc     htcperunc     iosperpos  googleperpos googleperunc
iphone           0.010518187   0.106749860 -3.602071e-03  0.069886518 -0.017262453   0.043282408 -0.0031203655  0.0099971494 -1.964031e-02  0.1094526897  0.062893979
samsunggalaxy    0.046511744   0.303806640  9.972766e-03  0.178639085 -0.017585052   0.183818168  0.0322934086  0.0443416350 -5.879154e-03  0.2462351542  0.142356613
sonyxperia      -0.028702428  -0.001954085  1.225614e-01 -0.012097255 -0.028945578   0.005806506  0.1447219008 -0.0059604585 -1.104777e-02 -0.0084985788 -0.007938922
nokialumina      0.033896699   0.017334611 -9.524197e-04  0.050055108  0.020344178   0.035313539 -0.0011523967  0.0237591934  3.069653e-02  0.0064950782  0.007986621
googleandroid    0.213673996   0.558397782  5.650718e-03  0.433837615  0.057232499   0.222107728 -0.0039543662  0.1097905831 -1.675981e-02  0.6389390017  0.372201059
iphonecampos     0.150869485   0.092098343  7.042108e-03  0.109208496  0.186508454   0.039569012  0.0173504603  0.0669218227 -3.970216e-03  0.1179836313  0.073053737
samsungcampos    0.113258841   0.547344221  1.939052e-02  0.231575898  0.031457128   0.486675117  0.0490858768  0.0603743949  1.026053e-01  0.2986522534  0.159368133
sonycampos       0.007059611   0.035114801  1.872270e-01  0.009464047  0.005526194   0.045174276  0.3507086050  0.0116431374 -3.016055e-03  0.0069805360 -0.003422972
nokiacampos      0.030875472   0.026194897 -6.347189e-04  0.033777618  0.021690016   0.049303204 -0.0007679891  0.0172626278  1.031108e-01  0.0115514955  0.012509814
htccampos        0.074613471   0.133444763  3.728334e-03  0.290115236  0.059597922   0.057295111  0.0140655611  0.2532266531 -6.118699e-03  0.1632134901  0.100072177
iphonecamneg     0.307061170   0.362011315  2.059739e-02  0.348278763  0.215941489   0.139175768  0.0337608982  0.1146281157 -1.222786e-02  0.4178946180  0.241410234
samsungcamneg    0.260478673   0.825413224  2.545357e-02  0.511819454  0.070284556   0.389700324  0.0622917487  0.1220749713  1.100253e-01  0.6586286127  0.342103059
sonycamneg       0.044863445   0.072412103  4.654207e-01  0.028103569  0.026840559   0.084866691  0.6167570374  0.0262945924 -1.290533e-03  0.0208928764 -0.001539325
htccamneg        0.247144175   0.510198709  8.815092e-03  0.756238655  0.100213759   0.198978258  0.0304713045  0.4254993380 -1.096948e-02  0.5790457486  0.334138609
iphonecamunc     0.111406373   0.055077108  4.863074e-03  0.070405135  0.173075423   0.033407544  0.0232479942  0.0571069346 -4.955082e-03  0.0769353719  0.058156494
samsungcamunc    0.166404297   0.687072767  4.613737e-02  0.328109674  0.074501519   0.601622057  0.1383622956  0.1232133772  1.295426e-01  0.4190039004  0.270479408
sonycamunc       0.015913421   0.052299042  2.128898e-01  0.019438755  0.025133744   0.078254924  0.5465862762  0.0280088185 -7.133983e-04 -0.0037606539 -0.002346280
htccamunc        0.109169341   0.174449277  9.458497e-03  0.559545613  0.113378069   0.106729580  0.0444070343  0.6012260266 -7.871624e-03  0.2234896449  0.162562910
iphonedispos     0.637549393   0.142340126  4.756778e-03  0.123731381  0.665294228   0.157307185  0.0256367561  0.0916188457  2.029183e-02  0.1658831441  0.180008358
sonydispos       0.006165917   0.037359253  1.698345e-01  0.016800969  0.012257580   0.049024672  0.3239207897  0.0168874444  2.571588e-02  0.0002494674 -0.001865319
nokiadispos      0.031445896   0.065814004 -5.657962e-04  0.030600884  0.018393284   0.118680256 -0.0006845949  0.0144910357  7.952961e-02 -0.0024386792 -0.001521497
htcdispos        0.046924817   0.099978540  4.143993e-03  0.155288679  0.045415445   0.105329698  0.0137490830  0.1351996062 -1.143231e-03  0.1183631135  0.124039759
iphonedisneg     0.641028889   0.195684738  7.243647e-03  0.173327375  0.570373895   0.171119626  0.0248166695  0.0964830230  1.560114e-02  0.2188666272  0.204713023
htcdisneg        0.304483931   0.623159857  2.336729e-02  0.843539217  0.225119925   0.535833491  0.0701301831  0.5507719471  2.663561e-04  0.7062400990  0.645103864
iphonedisunc     0.563691874   0.111738270  2.930222e-03  0.095575003  0.623368394   0.157224860  0.0229792946  0.0928433746  2.409099e-02  0.1330645569  0.172537052
sonydisunc       0.012118871   0.057212597  1.848570e-01  0.028388647  0.024073254   0.089554583  0.4753187875  0.0358684435  1.466589e-02 -0.0027348811 -0.001706298
nokiadisunc      0.010573317   0.079942843 -4.849237e-04  0.016623999  0.008037077   0.143137175 -0.0005867418  0.0073259556  1.020280e-01 -0.0020901045 -0.001304021
htcdisunc        0.202481428   0.405749722  2.009107e-02  0.677341540  0.248031792   0.538900501  0.0824265235  0.7213154358  6.410506e-03  0.4831068089  0.593592947
iphoneperpos     0.793886713   0.212807435  1.861114e-02  0.183633348  0.790788386   0.200057561  0.0359771403  0.1239551674  2.116814e-01  0.2417512206  0.239047349
samsungperpos    0.189373270   0.802176556  2.447781e-02  0.323540972  0.140873872   0.777251273  0.0551106601  0.1892372729  2.742544e-01  0.4443856329  0.427622132
sonyperpos       0.029376968   0.044950438  8.014976e-01  0.010275569  0.018819851   0.050159783  0.7362796362  0.0123505505  5.839549e-03  0.0077913188 -0.002369275
nokiaperpos      0.023987999   0.037649537 -6.908587e-04  0.044519400  0.014458896   0.069745329 -0.0008359165  0.0163745389  8.493374e-02  0.0020368901  0.003132028
htcperpos        0.161017008   0.317036864  5.950830e-03  0.715453696  0.142440001   0.299245835  0.0237154378  0.8498301985 -2.810574e-03  0.3804555947  0.368496557
iphoneperneg     1.000000000   0.317642064  4.805314e-02  0.293941617  0.758909217   0.247797554  0.0437571514  0.1407345338  2.485016e-01  0.3466626958  0.297424663
samsungperneg    0.317642064   1.000000000  3.156094e-02  0.561372641  0.199251539   0.776935211  0.0622433148  0.2713156267  2.028547e-01  0.7584015158  0.641221708
sonyperneg       0.048053145   0.031560944  1.000000e+00  0.008000880  0.015678302   0.030523306  0.6820982219  0.0062945258  2.880773e-05  0.0105302521 -0.001187334
htcperneg        0.293941617   0.561372641  8.000880e-03  1.000000000  0.185666237   0.436953575  0.0223995862  0.6598137354 -3.632114e-03  0.6291204572  0.540113471
iphoneperunc     0.758909217   0.199251539  1.567830e-02  0.185666237  1.000000000   0.257579312  0.0492418692  0.1720801124  1.676109e-01  0.2440512068  0.298692714
samsungperunc    0.247797554   0.776935211  3.052331e-02  0.436953575  0.257579312   1.000000000  0.0849629743  0.3462233283  1.029187e-01  0.6170543151  0.740607313
sonyperunc       0.043757151   0.062243315  6.820982e-01  0.022399586  0.049241869   0.084962974  1.0000000000  0.0297101928 -2.739688e-03 -0.0023026608 -0.001436635
htcperunc        0.140734534   0.271315627  6.294526e-03  0.659813735  0.172080112   0.346223328  0.0297101928  1.0000000000  9.598924e-04  0.3331110571  0.394657103
iosperpos        0.248501573   0.202854749  2.880773e-05 -0.003632114  0.167610887   0.102918715 -0.0027396882  0.0009598924  1.000000e+00 -0.0097593780 -0.006088896
googleperpos     0.346662696   0.758401516  1.053025e-02  0.629120457  0.244051207   0.617054315 -0.0023026608  0.3331110571 -9.759378e-03  1.0000000000  0.887032695
googleperunc     0.297424663   0.641221708 -1.187334e-03  0.540113471  0.298692714   0.740607313 -0.0014366349  0.3946571026 -6.088896e-03  0.8870326954  1.000000000
galaxysentiment -0.008834592  -0.133426703 -2.341915e-02 -0.207231582  0.032683689  -0.054218889 -0.0133558416 -0.1144875788 -1.546352e-02 -0.1328412643 -0.067570988
                galaxysentiment
iphone              0.001486419
samsunggalaxy      -0.345257248
sonyxperia         -0.219816736
nokialumina        -0.052469037
googleandroid      -0.186733253
iphonecampos       -0.031921433
samsungcampos      -0.112302827
sonycampos         -0.076336596
nokiacampos        -0.031771935
htccampos          -0.120156303
iphonecamneg       -0.093805039
samsungcamneg      -0.182639799
sonycamneg         -0.009612904
htccamneg          -0.222750052
iphonecamunc       -0.010329492
samsungcamunc      -0.136028471
sonycamunc         -0.047872336
htccamunc          -0.148868149
iphonedispos        0.009145782
sonydispos         -0.034666620
nokiadispos        -0.024478349
htcdispos          -0.060638338
iphonedisneg       -0.003520125
htcdisneg          -0.190575725
iphonedisunc        0.019354502
sonydisunc         -0.030999910
nokiadisunc        -0.023286467
htcdisunc          -0.132600148
iphoneperpos        0.024619007
samsungperpos      -0.077430604
sonyperpos         -0.023356142
nokiaperpos        -0.035892661
htcperpos          -0.175447029
iphoneperneg       -0.008834592
samsungperneg      -0.133426703
sonyperneg         -0.023419155
htcperneg          -0.207231582
iphoneperunc        0.032683689
samsungperunc      -0.054218889
sonyperunc         -0.013355842
htcperunc          -0.114487579
iosperpos          -0.015463524
googleperpos       -0.132841264
googleperunc       -0.067570988
galaxysentiment     1.000000000

Near Zero Variables

Removing near zero vars:

galaxy_nzvMetrics <- nearZeroVar(galaxyCOR, saveMetrics = TRUE)
str(galaxy_nzvMetrics)
'data.frame':   45 obs. of  4 variables:
 $ freqRatio    : num  5.04 14.09 44.11 495.5 61.25 ...
 $ percentUnique: num  0.2091 0.0542 0.0387 0.0232 0.0465 ...
 $ zeroVar      : logi  FALSE FALSE FALSE FALSE FALSE FALSE ...
 $ nzv          : logi  FALSE FALSE TRUE TRUE TRUE FALSE ...
galaxyNZV <- remove_nzv(galaxyCOR)
 int [1:34] 3 4 5 7 8 9 10 11 12 13 ...
Classes ‘tbl_df’, ‘tbl’ and 'data.frame':   12911 obs. of  11 variables:
 $ iphone         : num  1 1 1 0 1 2 1 1 4 1 ...
 $ samsunggalaxy  : num  0 0 1 0 0 0 0 0 0 0 ...
 $ iphonecampos   : num  0 0 1 0 0 1 0 0 0 0 ...
 $ iphonecamunc   : num  0 0 0 0 0 0 0 0 0 0 ...
 $ iphonedispos   : num  0 1 0 0 0 0 2 0 0 0 ...
 $ iphonedisneg   : num  0 1 0 0 0 0 0 0 0 0 ...
 $ iphonedisunc   : num  0 1 0 0 0 0 0 0 0 0 ...
 $ iphoneperpos   : num  0 0 0 0 0 0 0 0 0 0 ...
 $ iphoneperneg   : num  0 0 0 0 0 0 0 0 0 0 ...
 $ iphoneperunc   : num  0 0 0 0 0 0 0 0 0 0 ...
 $ galaxysentiment: num  5 3 3 0 1 0 3 5 5 5 ...
paste("NZV number of features after cleanup: ", ncol(galaxyNZV))
[1] "NZV number of features after cleanup:  11"

Recursive Feature Elimination (RFE)

# Get results
g_rfe_results

Recursive feature selection

Outer resampling method: Cross-Validated (10 fold, repeated 5 times) 

Resampling performance over subset size:

The top 5 variables (out of 11):
   iphone, googleandroid, iphoneperpos, samsunggalaxy, iphonedispos
# Plot results
plot(g_rfe_results, type=c("g", "o"))

Create a new dataset with the best features found by RFE

str(galaxyRFE)
Classes ‘tbl_df’, ‘tbl’ and 'data.frame':   12911 obs. of  12 variables:
 $ iphone         : num  1 1 1 0 1 2 1 1 4 1 ...
 $ googleandroid  : num  0 0 0 0 0 0 0 0 0 0 ...
 $ iphoneperpos   : num  0 0 0 0 0 0 0 0 0 0 ...
 $ samsunggalaxy  : num  0 0 1 0 0 0 0 0 0 0 ...
 $ iphonedispos   : num  0 1 0 0 0 0 2 0 0 0 ...
 $ iphonedisunc   : num  0 1 0 0 0 0 0 0 0 0 ...
 $ iphoneperneg   : num  0 0 0 0 0 0 0 0 0 0 ...
 $ htcphone       : num  0 0 0 1 0 0 0 0 0 0 ...
 $ iphoneperunc   : num  0 0 0 0 0 0 0 0 0 0 ...
 $ sonyxperia     : num  0 0 0 0 0 0 0 0 0 0 ...
 $ iphonedisneg   : num  0 1 0 0 0 0 0 0 0 0 ...
 $ galaxysentiment: Factor w/ 6 levels "0","1","2","3",..: 6 4 4 1 2 1 4 6 6 6 ...

Models training

Preprocess label and Data Partition

g_df <- galaxyDF
g_df$galaxysentiment <- as.factor(g_df$galaxysentiment)
plot_ly(g_df, x= ~g_df$galaxysentiment, type='histogram')

set.seed(90210)
g_dataPar <- createDataPartition(g_df$galaxysentiment, p = .70, list = FALSE)
g_train_df <- g_df[g_dataPar,]
g_test_df <- g_df[-g_dataPar,]

#galaxyCOR
galaxyCOR$galaxysentiment <- as.factor(galaxyCOR$galaxysentiment)
set.seed(90210)
g_dataParCOR <- createDataPartition(galaxyCOR$galaxysentiment, p = .70, list = FALSE)
g_train_dfCOR <- galaxyCOR[g_dataParCOR,]
g_test_dfCOR <- galaxyCOR[-g_dataParCOR,]

#galaxyRFE
galaxyRFE$galaxysentiment <- as.factor(galaxyRFE$galaxysentiment)
set.seed(90210)
g_dataParRFE <- createDataPartition(galaxyRFE$galaxysentiment, p = .70, list = FALSE)
g_train_dfRFE <- galaxyRFE[g_dataParRFE,]
g_test_dfRFE <- galaxyRFE[-g_dataParRFE,]

#galaxyNZV
galaxyNZV$galaxysentiment <- as.factor(galaxyNZV$galaxysentiment)
set.seed(90210)
g_dataParNZV <- createDataPartition(galaxyNZV$galaxysentiment, p = .70, list = FALSE)
g_train_dfNZV <- galaxyNZV[g_dataParNZV,]
g_test_dfNZV <- galaxyNZV[-g_dataParNZV,]

Cross Validation Fit Control

# cross validation 
fitControl <- trainControl(method = "repeatedcv", number = 10, repeats = 2)

# Training functions
g_svm_train <- function(dataF, testing_data) {
  # SVM (from the e1071 package) 
  library(e1071)
  set.seed(641386945)
  system.time(res.model <- run_in_parallel(svm, galaxysentiment ~., data = dataF))
  res.predictions <- predict(res.model, testing_data) 
  res.post_resample <- postResample(res.predictions, testing_data$galaxysentiment)
  return(list("model" = res.model, "post_resample" = res.post_resample))
}

g_knn_train <- function(dataF, testing_data) {
  # K-nearest Neighbors (from the kknn package)
  library(kknn)
  set.seed(641386945)
  system.time(res.model <- run_in_parallel(train.kknn, galaxysentiment ~., data = dataF))
  res.predictions <- predict(res.model, testing_data) 
  res.post_resample <- postResample(res.predictions, testing_data$galaxysentiment)
  return(list("model" = res.model, "post_resample" = res.post_resample))
}

g_caret_train <- function(dataF, testing_data, model_name, fitCtrl) {
  set.seed(641386945)
  system.time(res.model <- run_in_parallel(train, galaxysentiment~., data = dataF, method = model_name, trControl = fitCtrl ))
  res.predictions <- predict(res.model, testing_data) 
  res.post_resample <- postResample(res.predictions, testing_data$galaxysentiment)
  return(list("model" = res.model, "post_resample" = res.post_resample))
}

C5.0 Model

##### Decision Tree (C5.0) #####
print("C5.0: Full Dataset")
[1] "C5.0: Full Dataset"
#g_dt_c50 <- g_caret_train(g_df, g_test_df, 'C5.0', fitControl)
g_dt_c50["model"]
$model
C5.0 

12911 samples
   58 predictor
    6 classes: '0', '1', '2', '3', '4', '5' 

No pre-processing
Resampling: Cross-Validated (10 fold, repeated 2 times) 
Summary of sample sizes: 11620, 11621, 11621, 11619, 11621, 11619, ... 
Resampling results across tuning parameters:

  model  winnow  trials  Accuracy   Kappa    
  rules  FALSE    1      0.7672526  0.5327786
  rules  FALSE   10     0.7583082 0.5170174
 rules FALSE  20     0.7583082 0.5170174
 rules  TRUE   1     0.7666716 0.5320908
 rules  TRUE  10     0.7591981 0.5183106
 rules  TRUE  20     0.7591981 0.5183106
 tree  FALSE   1     0.7668272 0.5330249
 tree  FALSE  10     0.7589274 0.5192087
 tree  FALSE  20     0.7589274 0.5192087
 tree   TRUE   1     0.7665169 0.5325158
 tree   TRUE  10     0.7576101 0.5166974
 tree   TRUE  20     0.7576101 0.5166974

Accuracy was used to select the optimal model using the largest value.
The final values used for the model were trials = 1, model = rules and winnow = FALSE.
g_dt_c50["post_resample"]
$post_resample
 Accuracy     Kappa 
0.7721519 0.5450722 
#Train model with RFE dataset:
print("C5.0: RFE")
[1] "C5.0: RFE"
#g_dt_c50_rfe <- g_caret_train(galaxyRFE, g_test_dfRFE, 'C5.0', fitControl)
g_dt_c50_rfe["model"]
$model
C5.0 

12911 samples
   11 predictor
    6 classes: '0', '1', '2', '3', '4', '5' 

No pre-processing
Resampling: Cross-Validated (10 fold, repeated 2 times) 
Summary of sample sizes: 11620, 11621, 11621, 11619, 11621, 11619, ... 
Resampling results across tuning parameters:

  model  winnow  trials  Accuracy   Kappa    
  rules  FALSE    1      0.7607080  0.5174166
  rules  FALSE   10      0.7512204  0.5009562
  rules  FALSE   20      0.7512204  0.5009562
  rules   TRUE    1      0.7603597  0.5168617
  rules   TRUE   10      0.7515303  0.5015714
  rules   TRUE   20      0.7515303  0.5015714
  tree   FALSE    1      0.7609021  0.5201882
  tree   FALSE   10      0.7513757  0.5019511
  tree   FALSE   20      0.7513757  0.5019511
  tree    TRUE    1      0.7607860  0.5201268
  tree    TRUE   10      0.7515693  0.5027408
  tree    TRUE   20      0.7515693  0.5027408

Accuracy was used to select the optimal model using the largest value.
The final values used for the model were trials = 1, model = tree and winnow = FALSE.
g_dt_c50_rfe["post_resample"]
$post_resample
 Accuracy     Kappa 
0.7690519 0.5380287 
#Train model with NZV dataset:
print("C5.0: NZV")
[1] "C5.0: NZV"
#g_dt_c50_nzv <- g_caret_train(galaxyNZV, g_test_dfNZV, 'C5.0', fitControl)
g_dt_c50_nzv["model"]
$model
C5.0 

12911 samples
   10 predictor
    6 classes: '0', '1', '2', '3', '4', '5' 

No pre-processing
Resampling: Cross-Validated (10 fold, repeated 2 times) 
Summary of sample sizes: 11620, 11621, 11621, 11619, 11621, 11619, ... 
Resampling results across tuning parameters:

  model  winnow  trials  Accuracy   Kappa    
  rules  FALSE    1      0.7332521  0.4518092
  rules  FALSE   10      0.7309282  0.4461427
  rules  FALSE   20      0.7309282  0.4461427
  rules   TRUE    1      0.7337560  0.4534929
  rules   TRUE   10      0.7296886  0.4444968
  rules   TRUE   20      0.7296886  0.4444968
  tree   FALSE    1      0.7368138  0.4660886
  tree   FALSE   10      0.7275602  0.4437951
  tree   FALSE   20      0.7275602  0.4437951
  tree    TRUE    1      0.7370465  0.4661838
  tree    TRUE   10      0.7268233  0.4412637
  tree    TRUE   20      0.7268233  0.4412637

Accuracy was used to select the optimal model using the largest value.
The final values used for the model were trials = 1, model = tree and winnow = TRUE.
g_dt_c50_nzv["post_resample"]
$post_resample
 Accuracy     Kappa 
0.7450271 0.4832191 
#Train model with COR dataset:
print("C5.0: COR")
[1] "C5.0: COR"
#g_dt_c50_cor <- g_caret_train(galaxyCOR, g_test_dfCOR, 'C5.0', fitControl)
g_dt_c50_cor["model"]
$model
C5.0 

12911 samples
   44 predictor
    6 classes: '0', '1', '2', '3', '4', '5' 

No pre-processing
Resampling: Cross-Validated (10 fold, repeated 2 times) 
Summary of sample sizes: 11620, 11621, 11621, 11619, 11621, 11619, ... 
Resampling results across tuning parameters:

  model  winnow  trials  Accuracy   Kappa    
  rules  FALSE    1      0.7670205  0.5324788
  rules  FALSE   10      0.7597015  0.5193774
  rules  FALSE   20      0.7597015  0.5193774
  rules   TRUE    1      0.7664006  0.5316565
  rules   TRUE   10      0.7582301  0.5166940
  rules   TRUE   20      0.7582301  0.5166940
  tree   FALSE    1      0.7668659  0.5331394
  tree   FALSE   10      0.7583455  0.5181921
  tree   FALSE   20      0.7583455  0.5181921
  tree    TRUE    1      0.7665174  0.5327633
  tree    TRUE   10      0.7576482  0.5178329
  tree    TRUE   20      0.7576482  0.5178329

Accuracy was used to select the optimal model using the largest value.
The final values used for the model were trials = 1, model = rules and winnow = FALSE.
g_dt_c50_cor["post_resample"]
$post_resample
 Accuracy     Kappa 
0.7718936 0.5447499 

Random Forest Model

print("Random Forest: Full Dataset")
[1] "Random Forest: Full Dataset"
#g_rf <- g_caret_train(g_df, g_test_df, 'rf', fitControl)
g_rf["model"]
$model
Random Forest 

12911 samples
   58 predictor
    6 classes: '0', '1', '2', '3', '4', '5' 

No pre-processing
Resampling: Cross-Validated (10 fold, repeated 2 times) 
Summary of sample sizes: 11620, 11621, 11621, 11619, 11621, 11619, ... 
Resampling results across tuning parameters:

  mtry  Accuracy   Kappa    
   2    0.7069549  0.3603106
  30    0.7662466  0.5345938
  58    0.7585009  0.5237583

Accuracy was used to select the optimal model using the largest value.
The final value used for the model was mtry = 30.
g_rf["post_resample"]
$post_resample
 Accuracy     Kappa 
0.8041850 0.6152908 
print("Random Forest: RFE")
[1] "Random Forest: RFE"
#g_rf_rfe <- g_caret_train(galaxyRFE, g_test_dfRFE, 'rf', fitControl)
g_rf_rfe["model"]
$model
Random Forest 

12911 samples
   11 predictor
    6 classes: '0', '1', '2', '3', '4', '5' 

No pre-processing
Resampling: Cross-Validated (10 fold, repeated 2 times) 
Summary of sample sizes: 11620, 11621, 11621, 11619, 11621, 11619, ... 
Resampling results across tuning parameters:

  mtry  Accuracy   Kappa    
   2    0.7521891  0.4940431
   6    0.7610574  0.5249298
  11    0.7532750  0.5140141

Accuracy was used to select the optimal model using the largest value.
The final value used for the model was mtry = 6.
g_rf_rfe["post_resample"]
$post_resample
 Accuracy     Kappa 
0.7928184 0.5923106 
print("Random Forest: NZV")
[1] "Random Forest: NZV"
#g_rf_nzv <- g_caret_train(galaxyNZV, g_test_dfNZV, 'rf', fitControl)
g_rf_nzv["model"]
$model
Random Forest 

12911 samples
   10 predictor
    6 classes: '0', '1', '2', '3', '4', '5' 

No pre-processing
Resampling: Cross-Validated (10 fold, repeated 2 times) 
Summary of sample sizes: 11620, 11621, 11621, 11619, 11621, 11619, ... 
Resampling results across tuning parameters:

  mtry  Accuracy   Kappa    
   2    0.7403773  0.4697407
   6    0.7364660  0.4690349
  10    0.7310058  0.4627170

Accuracy was used to select the optimal model using the largest value.
The final value used for the model was mtry = 2.
g_rf_nzv["post_resample"]
$post_resample
 Accuracy     Kappa 
0.7587187 0.5118240 
print("Random Forest: COR")
[1] "Random Forest: COR"
#g_rf_cor <- g_caret_train(galaxyCOR, g_test_dfCOR, 'rf', fitControl)
g_rf_cor["model"]
$model
Random Forest 

12911 samples
   44 predictor
    6 classes: '0', '1', '2', '3', '4', '5' 

No pre-processing
Resampling: Cross-Validated (10 fold, repeated 2 times) 
Summary of sample sizes: 11620, 11621, 11621, 11619, 11621, 11619, ... 
Resampling results across tuning parameters:

  mtry  Accuracy   Kappa    
   2    0.7015336  0.3422305
  23    0.7663238  0.5341970
  44    0.7587338  0.5241327

Accuracy was used to select the optimal model using the largest value.
The final value used for the model was mtry = 23.
g_rf_cor["post_resample"]
$post_resample
 Accuracy     Kappa 
0.8021183 0.6114243 

Support Vector Machine (SVM) Model

print("SVM: Full Dataset")
[1] "SVM: Full Dataset"
#g_svm_train_full <- g_svm_train(g_df, g_test_df)
g_svm_train_full["model"]
$model

Call:
svm(formula = ..1, data = ..2)


Parameters:
   SVM-Type:  C-classification 
 SVM-Kernel:  radial 
       cost:  1 
      gamma:  0.01724138 

Number of Support Vectors:  6386
g_svm_train_full["post_resample"]
$post_resample
 Accuracy     Kappa 
0.7124774 0.3907256 
print("SVM: RFE")
[1] "SVM: RFE"
#g_svm_train_rfe <- g_svm_train(galaxyRFE, g_test_dfRFE)
g_svm_train_rfe["model"]
$model

Call:
svm(formula = ..1, data = ..2)


Parameters:
   SVM-Type:  C-classification 
 SVM-Kernel:  radial 
       cost:  1 
      gamma:  0.09090909 

Number of Support Vectors:  6130
g_svm_train_rfe["post_resample"]
$post_resample
 Accuracy     Kappa 
0.7189357 0.4154935 
print("SVM: NZV")
[1] "SVM: NZV"
#g_svm_train_nzv <- g_svm_train(galaxyNZV, g_test_dfNZV)
g_svm_train_nzv["model"]
$model

Call:
svm(formula = ..1, data = ..2)


Parameters:
   SVM-Type:  C-classification 
 SVM-Kernel:  radial 
       cost:  1 
      gamma:  0.1 

Number of Support Vectors:  6822
g_svm_train_nzv["post_resample"]
$post_resample
 Accuracy     Kappa 
0.7199690 0.4126435 
print("SVM: COR")
[1] "SVM: COR"
#g_svm_train_cor <- g_svm_train(galaxyCOR, g_test_dfCOR)
g_svm_train_cor["model"]
$model

Call:
svm(formula = ..1, data = ..2)


Parameters:
   SVM-Type:  C-classification 
 SVM-Kernel:  radial 
       cost:  1 
      gamma:  0.02272727 

Number of Support Vectors:  6313
g_svm_train_cor["post_resample"]
$post_resample
 Accuracy     Kappa 
0.7106691 0.3848878 

K-nearest Neighbors (KNN) Model

print("KNN: Full Dataset")
[1] "KNN: Full Dataset"
#g_knn_train_full <- g_knn_train(g_df, g_test_df)
g_knn_train_full["model"]
$model

Call:
FUN(formula = ..1, data = ..2)

Type of response variable: nominal
Minimal misclassification: 0.2450623
Best kernel: optimal
Best k: 11
g_knn_train_full["post_resample"]
$post_resample
 Accuracy     Kappa 
0.7602687 0.5384295 
print("KNN: RFE")
[1] "KNN: RFE"
#g_knn_train_rfe <- g_knn_train(galaxyRFE, g_test_dfRFE)
g_knn_train_rfe["model"]
$model

Call:
FUN(formula = ..1, data = ..2)

Type of response variable: nominal
Minimal misclassification: 0.2442104
Best kernel: optimal
Best k: 11
g_knn_train_rfe["post_resample"]
$post_resample
 Accuracy     Kappa 
0.7716352 0.5469901 
print("KNN: NZV")
[1] "KNN: NZV"
#g_knn_train_nzv <- g_knn_train(galaxyNZV, g_test_dfNZV)
g_knn_train_nzv["model"]
$model

Call:
FUN(formula = ..1, data = ..2)

Type of response variable: nominal
Minimal misclassification: 0.2652002
Best kernel: optimal
Best k: 11
g_knn_train_nzv["post_resample"]
$post_resample
 Accuracy     Kappa 
0.7507104 0.4979615 
print("KNN: COR")
[1] "KNN: COR"
#g_knn_train_cor <- g_knn_train(galaxyCOR, g_test_dfCOR)
g_knn_train_cor["model"]
$model

Call:
FUN(formula = ..1, data = ..2)

Type of response variable: nominal
Minimal misclassification: 0.2442104
Best kernel: optimal
Best k: 11
g_knn_train_cor["post_resample"]
$post_resample
 Accuracy     Kappa 
0.7739602 0.5557506 

Models Performance

Grouped bar chart to evaluate model performance ## Models Performance Grouped bar chart to evaluate model performance

# Creating confusion matrix
g_cm_dt <- confusionMatrix(predict(g_dt_c50["model"], g_test_df)$model, g_test_df$galaxysentiment)
plot_confusion_matrix(g_cm_dt, "C5.0")


g_cmsvm <- confusionMatrix(predict(g_svm_train_nzv["model"], g_test_df)$model, g_test_df$galaxysentiment) 
plot_confusion_matrix(g_cmsvm, "SVM")


g_cmRF <- confusionMatrix(predict(g_rf["model"], g_test_df)$model, g_test_df$galaxysentiment) 
plot_confusion_matrix(g_cmRF, "Random Forest")


g_cmknn <- confusionMatrix(predict(g_knn_train_cor["model"]$model, g_test_df), g_test_df$galaxysentiment) 
plot_confusion_matrix(g_cmknn, "KNN")


print("C5.0 detail")
[1] "C5.0 detail"
g_cm_dt
Confusion Matrix and Statistics

          Reference
Prediction    0    1    2    3    4    5
         0  348    1    1    6    2   27
         1    0    0    0    0    0    0
         2    2    0   11    0    1    2
         3    2    2    0  224    5   23
         4    4    1    3    2  139   18
         5  152  110  120  120  278 2267

Overall Statistics
                                          
               Accuracy : 0.7722          
                 95% CI : (0.7586, 0.7853)
    No Information Rate : 0.6037          
    P-Value [Acc > NIR] : < 2.2e-16       
                                          
                  Kappa : 0.5451          
                                          
 Mcnemar's Test P-Value : NA              

Statistics by Class:

                     Class: 0 Class: 1 Class: 2 Class: 3 Class: 4 Class: 5
Sensitivity           0.68504  0.00000 0.081481  0.63636  0.32706   0.9700
Specificity           0.98900  1.00000 0.998662  0.99091  0.99187   0.4915
Pos Pred Value        0.90390      NaN 0.687500  0.87500  0.83234   0.7440
Neg Pred Value        0.95410  0.97055 0.967834  0.96459  0.92279   0.9150
Prevalence            0.13123  0.02945 0.034875  0.09093  0.10979   0.6037
Detection Rate        0.08990  0.00000 0.002842  0.05787  0.03591   0.5856
Detection Prevalence  0.09946  0.00000 0.004133  0.06613  0.04314   0.7871
Balanced Accuracy     0.83702  0.50000 0.540072  0.81364  0.65947   0.7308
print("\n-------------------------------------------------------------------------------")
[1] "\n-------------------------------------------------------------------------------"
print("RF detail")
[1] "RF detail"
g_cmRF
Confusion Matrix and Statistics

          Reference
Prediction    0    1    2    3    4    5
         0  373    1    1    4    2   16
         1    0   10    0    0    0    0
         2    3    0   25    0    2    2
         3    1    2    0  239    4   19
         4    1    1    0    2  171    5
         5  130  100  109  107  246 2295

Overall Statistics
                                          
               Accuracy : 0.8042          
                 95% CI : (0.7913, 0.8166)
    No Information Rate : 0.6037          
    P-Value [Acc > NIR] : < 2.2e-16       
                                          
                  Kappa : 0.6153          
                                          
 Mcnemar's Test P-Value : NA              

Statistics by Class:

                     Class: 0 Class: 1 Class: 2 Class: 3 Class: 4 Class: 5
Sensitivity           0.73425 0.087719 0.185185  0.67898  0.40235   0.9820
Specificity           0.99286 1.000000 0.998126  0.99261  0.99739   0.5489
Pos Pred Value        0.93955 1.000000 0.781250  0.90189  0.95000   0.7683
Neg Pred Value        0.96114 0.973064 0.971347  0.96866  0.93118   0.9525
Prevalence            0.13123 0.029450 0.034875  0.09093  0.10979   0.6037
Detection Rate        0.09636 0.002583 0.006458  0.06174  0.04417   0.5929
Detection Prevalence  0.10256 0.002583 0.008267  0.06846  0.04650   0.7716
Balanced Accuracy     0.86356 0.543860 0.591656  0.83579  0.69987   0.7655
print("\n-------------------------------------------------------------------------------")
[1] "\n-------------------------------------------------------------------------------"
print("SVM detail")
[1] "SVM detail"
g_cmsvm
Confusion Matrix and Statistics

          Reference
Prediction    0    1    2    3    4    5
         0  342    2   13   61    7   31
         1    0    1    0    0    0    0
         2    0    0    0    0    0    0
         3    0    0    1   43    2   17
         4    1    1    0    2  118    6
         5  165  110  121  246  298 2283

Overall Statistics
                                          
               Accuracy : 0.72            
                 95% CI : (0.7055, 0.7341)
    No Information Rate : 0.6037          
    P-Value [Acc > NIR] : < 2.2e-16       
                                          
                  Kappa : 0.4126          
                                          
 Mcnemar's Test P-Value : NA              

Statistics by Class:

                     Class: 0  Class: 1 Class: 2 Class: 3 Class: 4 Class: 5
Sensitivity           0.67323 0.0087719  0.00000  0.12216  0.27765   0.9769
Specificity           0.96610 1.0000000  1.00000  0.99432  0.99710   0.3872
Pos Pred Value        0.75000 1.0000000      NaN  0.68254  0.92188   0.7083
Neg Pred Value        0.95139 0.9708010  0.96513  0.91886  0.91798   0.9167
Prevalence            0.13123 0.0294498  0.03487  0.09093  0.10979   0.6037
Detection Rate        0.08835 0.0002583  0.00000  0.01111  0.03048   0.5898
Detection Prevalence  0.11780 0.0002583  0.00000  0.01627  0.03307   0.8326
Balanced Accuracy     0.81967 0.5043860  0.50000  0.55824  0.63737   0.6821
print("\n-------------------------------------------------------------------------------")
[1] "\n-------------------------------------------------------------------------------"
print("KNN detail")
[1] "KNN detail"
g_cmknn
Confusion Matrix and Statistics

          Reference
Prediction    0    1    2    3    4    5
         0  355    3    1    9    8   35
         1    0    4    1    0    1    5
         2    2    1   13    0    2    4
         3    5    3    3  230    6   32
         4    2    1    4    1  143   10
         5  144  102  113  112  265 2251

Overall Statistics
                                          
               Accuracy : 0.774           
                 95% CI : (0.7605, 0.7871)
    No Information Rate : 0.6037          
    P-Value [Acc > NIR] : < 2.2e-16       
                                          
                  Kappa : 0.5558          
                                          
 Mcnemar's Test P-Value : < 2.2e-16       

Statistics by Class:

                     Class: 0 Class: 1 Class: 2 Class: 3 Class: 4 Class: 5
Sensitivity           0.69882 0.035088 0.096296  0.65341  0.33647   0.9632
Specificity           0.98335 0.998137 0.997591  0.98608  0.99478   0.5202
Pos Pred Value        0.86375 0.363636 0.590909  0.82437  0.88820   0.7536
Neg Pred Value        0.95578 0.971503 0.968303  0.96604  0.92399   0.9027
Prevalence            0.13123 0.029450 0.034875  0.09093  0.10979   0.6037
Detection Rate        0.09171 0.001033 0.003358  0.05942  0.03694   0.5815
Detection Prevalence  0.10617 0.002842 0.005683  0.07207  0.04159   0.7716
Balanced Accuracy     0.84108 0.516612 0.546944  0.81974  0.66562   0.7417

Model Selection

The accuracy and kappa shown by the **Random Forest model, using all the feattures from the dataset were the best.

Large dataset prediction

Pre processing the large datatset

g_large_df <- read_csv("big_matrix.csv")
Parsed with column specification:
cols(
  .default = col_double()
)
See spec(...) for full column specifications.
g_large_df$id <- NULL

# No Feautures need to be removed
# review outcome
str(g_large_df)
Classes ‘spec_tbl_df’, ‘tbl_df’, ‘tbl’ and 'data.frame':    59139 obs. of  58 variables:
 $ iphone       : num  0 0 0 0 0 0 0 0 0 0 ...
 $ samsunggalaxy: num  0 0 0 0 0 1 0 0 0 0 ...
 $ sonyxperia   : num  0 0 0 0 0 0 0 0 0 0 ...
 $ nokialumina  : num  0 0 0 0 0 0 0 0 0 0 ...
 $ htcphone     : num  0 0 0 0 0 0 0 0 2 0 ...
 $ ios          : num  0 0 1 1 0 0 0 0 0 0 ...
 $ googleandroid: num  7 0 0 0 0 0 0 0 0 0 ...
 $ iphonecampos : num  0 0 0 0 0 0 0 0 0 0 ...
 $ samsungcampos: num  0 0 0 0 0 0 0 0 0 0 ...
 $ sonycampos   : num  0 0 0 0 0 0 0 0 0 0 ...
 $ nokiacampos  : num  0 0 0 0 0 0 0 0 0 0 ...
 $ htccampos    : num  0 0 0 0 0 0 0 0 0 0 ...
 $ iphonecamneg : num  0 0 0 0 0 0 0 0 0 0 ...
 $ samsungcamneg: num  0 0 0 0 0 0 0 0 0 0 ...
 $ sonycamneg   : num  0 0 0 0 0 0 0 0 0 0 ...
 $ nokiacamneg  : num  0 0 0 0 0 0 0 0 0 0 ...
 $ htccamneg    : num  0 0 0 0 0 0 0 0 0 0 ...
 $ iphonecamunc : num  0 0 0 0 0 0 0 0 0 0 ...
 $ samsungcamunc: num  0 0 0 0 0 1 0 0 0 0 ...
 $ sonycamunc   : num  0 0 0 0 0 0 0 0 0 0 ...
 $ nokiacamunc  : num  0 0 0 0 0 0 0 0 0 0 ...
 $ htccamunc    : num  0 0 0 0 0 0 0 0 0 0 ...
 $ iphonedispos : num  0 0 0 0 1 0 0 0 0 0 ...
 $ samsungdispos: num  0 0 0 0 0 2 0 0 0 0 ...
 $ sonydispos   : num  0 0 0 0 0 0 0 0 0 0 ...
 $ nokiadispos  : num  0 0 0 0 0 0 0 0 0 0 ...
 $ htcdispos    : num  0 0 0 0 0 0 0 0 0 0 ...
 $ iphonedisneg : num  0 0 0 0 0 0 0 0 0 0 ...
 $ samsungdisneg: num  0 0 0 0 0 0 0 0 0 0 ...
 $ sonydisneg   : num  0 0 0 0 0 0 0 0 0 0 ...
 $ nokiadisneg  : num  0 0 0 0 0 0 0 0 0 0 ...
 $ htcdisneg    : num  0 0 0 0 0 0 0 0 0 0 ...
 $ iphonedisunc : num  0 0 0 0 0 0 0 0 0 0 ...
 $ samsungdisunc: num  0 0 0 0 0 0 0 0 0 0 ...
 $ sonydisunc   : num  0 0 0 0 0 0 0 0 0 0 ...
 $ nokiadisunc  : num  0 0 0 0 0 0 0 0 0 0 ...
 $ htcdisunc    : num  0 0 0 0 0 0 0 0 0 0 ...
 $ iphoneperpos : num  0 0 0 0 9 0 0 0 0 0 ...
 $ samsungperpos: num  0 0 0 0 0 0 0 0 0 0 ...
 $ sonyperpos   : num  0 0 0 0 0 0 0 0 0 0 ...
 $ nokiaperpos  : num  0 0 0 0 0 0 0 0 0 0 ...
 $ htcperpos    : num  0 0 0 0 0 0 0 0 0 0 ...
 $ iphoneperneg : num  0 0 0 0 0 0 0 0 0 0 ...
 $ samsungperneg: num  0 0 0 0 0 0 0 0 0 0 ...
 $ sonyperneg   : num  0 0 0 0 0 0 0 0 0 0 ...
 $ nokiaperneg  : num  0 0 0 0 0 0 0 0 0 0 ...
 $ htcperneg    : num  0 0 0 0 0 0 0 0 0 0 ...
 $ iphoneperunc : num  0 0 0 0 0 0 0 0 0 0 ...
 $ samsungperunc: num  0 0 0 0 0 0 0 0 0 0 ...
 $ sonyperunc   : num  0 0 0 0 0 0 0 0 0 0 ...
 $ nokiaperunc  : num  0 0 0 0 0 0 0 0 0 0 ...
 $ htcperunc    : num  0 0 0 0 0 0 0 0 0 0 ...
 $ iosperpos    : num  0 0 0 0 0 0 0 0 0 0 ...
 $ googleperpos : num  0 0 0 0 0 0 0 0 0 0 ...
 $ iosperneg    : num  0 0 0 0 0 0 0 0 0 0 ...
 $ googleperneg : num  0 0 0 0 0 0 0 0 0 0 ...
 $ iosperunc    : num  0 0 0 0 0 0 0 0 0 0 ...
 $ googleperunc : num  0 0 0 0 0 0 0 0 0 1 ...
 - attr(*, "spec")=
  .. cols(
  ..   id = col_double(),
  ..   iphone = col_double(),
  ..   samsunggalaxy = col_double(),
  ..   sonyxperia = col_double(),
  ..   nokialumina = col_double(),
  ..   htcphone = col_double(),
  ..   ios = col_double(),
  ..   googleandroid = col_double(),
  ..   iphonecampos = col_double(),
  ..   samsungcampos = col_double(),
  ..   sonycampos = col_double(),
  ..   nokiacampos = col_double(),
  ..   htccampos = col_double(),
  ..   iphonecamneg = col_double(),
  ..   samsungcamneg = col_double(),
  ..   sonycamneg = col_double(),
  ..   nokiacamneg = col_double(),
  ..   htccamneg = col_double(),
  ..   iphonecamunc = col_double(),
  ..   samsungcamunc = col_double(),
  ..   sonycamunc = col_double(),
  ..   nokiacamunc = col_double(),
  ..   htccamunc = col_double(),
  ..   iphonedispos = col_double(),
  ..   samsungdispos = col_double(),
  ..   sonydispos = col_double(),
  ..   nokiadispos = col_double(),
  ..   htcdispos = col_double(),
  ..   iphonedisneg = col_double(),
  ..   samsungdisneg = col_double(),
  ..   sonydisneg = col_double(),
  ..   nokiadisneg = col_double(),
  ..   htcdisneg = col_double(),
  ..   iphonedisunc = col_double(),
  ..   samsungdisunc = col_double(),
  ..   sonydisunc = col_double(),
  ..   nokiadisunc = col_double(),
  ..   htcdisunc = col_double(),
  ..   iphoneperpos = col_double(),
  ..   samsungperpos = col_double(),
  ..   sonyperpos = col_double(),
  ..   nokiaperpos = col_double(),
  ..   htcperpos = col_double(),
  ..   iphoneperneg = col_double(),
  ..   samsungperneg = col_double(),
  ..   sonyperneg = col_double(),
  ..   nokiaperneg = col_double(),
  ..   htcperneg = col_double(),
  ..   iphoneperunc = col_double(),
  ..   samsungperunc = col_double(),
  ..   sonyperunc = col_double(),
  ..   nokiaperunc = col_double(),
  ..   htcperunc = col_double(),
  ..   iosperpos = col_double(),
  ..   googleperpos = col_double(),
  ..   iosperneg = col_double(),
  ..   googleperneg = col_double(),
  ..   iosperunc = col_double(),
  ..   googleperunc = col_double()
  .. )

Apply Model on the large dataset

g_large_df$galaxysentiment<- predict(g_rf["model"]$model, g_large_df)
head(g_large_df$galaxysentiment, 5)
[1] 2 0 0 0 0
Levels: 0 1 2 3 4 5
summary(g_large_df$galaxysentiment)
    0     1     2     3     4     5 
39541     0  3757  1661    24 14156 

Galaxy Sentiments Results

galaxysentiment <- summary(g_large_df$galaxysentiment)
galaxysentiment_df <- data.frame("Categorie"=c("Very Negative", "Negative", "Somewhat Negative", "Somewhat Positive", "Positive", "Very Positive"), galaxysentiment)
galaxy_sent_data <- galaxysentiment_df[,c('Categorie', 'galaxysentiment')]

galaxy_pie <- plot_ly(galaxy_sent_data, labels = ~Categorie, values = ~galaxysentiment, type = 'pie') %>%
  layout(title = 'Galaxy Sentiment - Nov 2019',
         xaxis = list(showgrid = FALSE, zeroline = FALSE, showticklabels = FALSE),
         yaxis = list(showgrid = FALSE, zeroline = FALSE, showticklabels = FALSE))

galaxy_pie

NA
p <- plot_ly() %>%
  add_pie(data=galaxy_sent_data, labels = ~Categorie, values = ~galaxysentiment,
          name = "Galaxy", domain = list(x = c(0, 0.4), y = c(0.4, 1))) %>%
  add_pie(iphone_sent_data, labels = ~Categorie, values = ~iphonesentiment,
          name = "iPhone", domain = list(x = c(0.6, 1), y = c(0.4, 1))) %>%
  layout(title = "Galaxy (left) vs iPhone (right) Sentiments ", showlegend = F,
         xaxis = list(showgrid = FALSE, zeroline = FALSE, showticklabels = TRUE),
         yaxis = list(showgrid = FALSE, zeroline = FALSE, showticklabels = TRUE))

p

–EOF–

---
title: "Course 4/Task 3: Develop models to predict sentiment"
author: "Esteban Villalobos Gomez"
date: "January $23_{rd}$, 2020"
output:
  html_notebook:
    highlight: tango
    theme: simplex
    toc: yes
    toc_float: true
  pdf_document:
    df_print: kable
    toc: yes
  html_document:
    df_print: paged
    toc: yes
  word_document:
    toc: yes
subtitle: "XTOL Data Analytics and Big Data program"
---

```{r include = FALSE}
library(doParallel)
library(plotly)
library(corrplot)
library(RColorBrewer)
library(caret)
library(dplyr)
library(readr)
library(ggplot2)
options(max.print=1000000)
```


# Common function definitions:
```{r}
plot_correlation <- function(dataset) {
  #' Calculate the correlation among columns in the dataset
  #' and plot a heat diagram with the results
  #' @param dataset Data.frame to analyse
  #' @return correlation data
  corr_data <- cor(dataset)
  
  corrplot(corr_data, type="full", 
           order = "original",
           tl.cex = .6, 
           addCoefasPercent = TRUE,
           col=brewer.pal(n=8, name="RdYlBu"))
  return(corr_data)
}

# General EDA
describe_df <- function(name, df) {
  paste("EDA for ", name, ":")
  str(df)
  summary(df)
  paste("Number of NA values: ", sum(is.na(df)))
}

#### Preprocessing functions
remove_highly_correlated_features <- function(df) {
  corr_data <- cor(df)
  high_corr_cols <- findCorrelation(corr_data, cutoff = 0.9, verbose = FALSE, names = FALSE, exact = ncol(corr_data))
  df[high_corr_cols] <- NULL
  return(df)
}

remove_nzv <- function(df) {
  # nearZeroVar() with saveMetrics = FALSE returns an vector 
  nzv <- nearZeroVar(df, saveMetrics = FALSE) 
  str(nzv)

  # create a new data set and remove near zero variance features
  df_new <- df[,-nzv]
  str(df_new)
  return(df_new)
}


#### Execute in parallel
run_in_parallel <- function(FUN, ...) {
  # Find how many cores are on your machine
  num_cores <- detectCores() # Result = Typically 4 to 6
  
  # Create Cluster with desired number of cores. Don't use them all! Your computer is running other processes. 
  cl <- makeCluster(num_cores - 2)
  
  # Register Cluster
  registerDoParallel(cl)
  
  result <- FUN(...)
  
  # Stop Cluster. After performing your tasks, stop your cluster. 
  stopCluster(cl)
  return(result)
}

svm_train <- function(dataF, testing_data) {
  # SVM (from the e1071 package) 
  library(e1071)
  set.seed(641386945)
  system.time(res.model <- run_in_parallel(svm, iphonesentiment ~., data = dataF))
  res.predictions <- predict(res.model, testing_data) 
  res.post_resample <- postResample(res.predictions, testing_data$iphonesentiment)
  return(list("model" = res.model, "post_resample" = res.post_resample))
}

knn_train <- function(dataF, testing_data) {
  # K-nearest Neighbors (from the kknn package)
  library(kknn)
  set.seed(641386945)
  system.time(res.model <- run_in_parallel(train.kknn, iphonesentiment ~., data = dataF))
  res.predictions <- predict(res.model, testing_data) 
  res.post_resample <- postResample(res.predictions, testing_data$iphonesentiment)
  return(list("model" = res.model, "post_resample" = res.post_resample))
}

caret_train <- function(dataF, testing_data, model_name, fitCtrl) {
  set.seed(641386945)
  system.time(res.model <- run_in_parallel(train, iphonesentiment~., data = dataF, method = model_name, trControl = fitCtrl ))
  res.predictions <- predict(res.model, testing_data) 
  res.post_resample <- postResample(res.predictions, testing_data$iphonesentiment)
  return(list("model" = res.model, "post_resample" = res.post_resample))
}

plot_confusion_matrix <- function(conf_matrix, model_name) {
  table <- data.frame(conf_matrix$table)
  
  plotTable <- table %>%
    mutate(goodbad = ifelse(table$Prediction == table$Reference, "good", "bad")) %>%
    group_by(Reference) %>%
    mutate(prop = Freq/sum(Freq))
  
  # fill alpha relative to sensitivity/specificity by proportional outcomes within reference groups (see dplyr code above as well as original confusion matrix for comparison)
  ggplot(data = plotTable, mapping = aes(x = Reference, y = Prediction, fill = goodbad, alpha = prop)) +
    geom_tile() +
    geom_text(aes(label = Freq), vjust = .5, fontface  = "bold", alpha = 1) +
    scale_fill_manual(values = c(good = "green", bad = "red")) +
    theme_bw() +
    xlim(rev(levels(table$Reference))) +
    ggtitle(paste(model_name,"Confusion Matrix"))
}
```

# iPhone analysis

Load training datasets for iPhone labeled sentiment.
```{r echo=FALSE}
iphoneDF <- read_csv("iphone_smallmatrix_labeled_8d.csv")
```

Explore structure and descriptive statistics from the training datasets
```{r echo=FALSE}
describe_df("iPhone", iphoneDF)
```
## Labeled sentiment distribution.
```{r}
plot_ly(iphoneDF, x= ~iphoneDF$iphonesentiment, type='histogram')
```
## Feature selection methods
### Features Correlation
Explore correlation between all variables:
```{r echo=FALSE}
# create a new data set and remove features highly correlated with the dependant 
iphoneCOR <- remove_highly_correlated_features(iphoneDF)
paste("Number of original features: ", ncol(iphoneDF))
paste("Number of features after cleanup: ", ncol(iphoneCOR))
plot_correlation(iphoneCOR)
```
### Near Zero Variables
Removing near zero vars:
```{r}
nzvMetrics <- nearZeroVar(iphoneCOR, saveMetrics = TRUE)
str(nzvMetrics)

iphoneNZV <- remove_nzv(iphoneCOR)

paste("NZV number of features after cleanup: ", ncol(iphoneNZV))
```
### Recursive Feature Elimination (RFE)
```{r}
set.seed(9874568)
iphone_sample <- iphoneDF[sample(1:nrow(iphoneDF), 1000, replace=FALSE),]

# Set up rfeControl with randomforest, repeated cross validation and no updates
ctrl <- rfeControl(functions = rfFuncs,
                   method = "repeatedcv",
                   repeats = 5,
                   verbose = FALSE)

# Use rfe and omit the response variable (attribute 59 iphonesentiment)
rfe_results <- run_in_parallel(rfe, iphone_sample[,1:58],
                              iphone_sample$iphonesentiment,
                              sizes=(1:58), rfeControl=ctrl)

# Get results
rfe_results

# Plot results
plot(rfe_results, type=c("g", "o"))
```
Create a new dataset with the best features found by RFE
```{r}
# create new data set with rfe recommended features
iphoneRFE <- iphoneDF[,predictors(rfe_results)]

# add the dependent variable to iphoneRFE
iphoneRFE$iphonesentiment <- iphoneDF$iphonesentiment

# review outcome
str(iphoneRFE)
```
## Models training
### Preprocess label and Data Partition
```{r}
df <- iphoneDF
df$iphonesentiment <- as.factor(df$iphonesentiment)
plot_ly(df, x= ~df$iphonesentiment, type='histogram')

set.seed(90210)
dataPar <- createDataPartition(df$iphonesentiment, p = .70, list = FALSE)
train_df <- df[dataPar,]
test_df <- df[-dataPar,]

#iphoneCOR
iphoneCOR$iphonesentiment <- as.factor(iphoneCOR$iphonesentiment)
set.seed(90210)
dataParCOR <- createDataPartition(iphoneCOR$iphonesentiment, p = .70, list = FALSE)
train_dfCOR <- iphoneCOR[dataParCOR,]
test_dfCOR <- iphoneCOR[-dataParCOR,]

#iphoneRFE
iphoneRFE$iphonesentiment <- as.factor(iphoneRFE$iphonesentiment)
set.seed(90210)
dataParRFE <- createDataPartition(iphoneRFE$iphonesentiment, p = .70, list = FALSE)
train_dfRFE <- iphoneRFE[dataParRFE,]
test_dfRFE <- iphoneRFE[-dataParRFE,]

#iphoneNZV
iphoneNZV$iphonesentiment <- as.factor(iphoneNZV$iphonesentiment)
set.seed(90210)
dataParNZV <- createDataPartition(iphoneNZV$iphonesentiment, p = .70, list = FALSE)
train_dfNZV <- iphoneNZV[dataParNZV,]
test_dfNZV <- iphoneNZV[-dataParNZV,]

```
## Cross Validation Fit Control
```{r}
# cross validation 
fitControl <- trainControl(method = "repeatedcv", number = 10, repeats = 2)
```

### C5.0 Model
```{r}
##### Decision Tree (C5.0) #####
print("C5.0: Full Dataset")
dt_c50 <- caret_train(df, test_df, 'C5.0', fitControl)
dt_c50["model"]
dt_c50["post_resample"]
```
Train model with RFE dataset:
```{r}
print("C5.0: RFE")
dt_c50_rfe <- caret_train(iphoneRFE, test_dfRFE, 'C5.0', fitControl)
dt_c50_rfe["model"]
dt_c50_rfe["post_resample"]
```
Train model with NZV dataset:
```{r}
print("C5.0: NZV")
dt_c50_nzv <- caret_train(iphoneNZV, test_dfNZV, 'C5.0', fitControl)
dt_c50_nzv["model"]
dt_c50_nzv["post_resample"]
```
Train model with COR dataset:
```{r}
print("C5.0: COR")
dt_c50_cor <- caret_train(iphoneCOR, test_dfCOR, 'C5.0', fitControl)
dt_c50_cor["model"]
dt_c50_cor["post_resample"]
```

### Random Forest Model
```{r}
print("Random Forest: Full Dataset")
rf <- caret_train(df, test_df, 'rf', fitControl)
rf["model"]
rf["post_resample"]

print("Random Forest: RFE")
rf_rfe <- caret_train(iphoneRFE, test_dfRFE, 'rf', fitControl)
rf_rfe["model"]
rf_rfe["post_resample"]

print("Random Forest: NZV")
rf_nzv <- caret_train(iphoneNZV, test_dfNZV, 'rf', fitControl)
rf_nzv["model"]
rf_nzv["post_resample"]

print("Random Forest: COR")
rf_cor <- caret_train(iphoneCOR, test_dfCOR, 'rf', fitControl)
rf_cor["model"]
rf_cor["post_resample"]
```


### Support Vector Machine (SVM) Model
```{r}
print("SVM: Full Dataset")
svm_train_full <- svm_train(df, test_df)
svm_train_full["model"]
svm_train_full["post_resample"]

print("SVM: RFE")
svm_train_rfe <- svm_train(iphoneRFE, test_dfRFE)
svm_train_rfe["model"]
svm_train_rfe["post_resample"]

print("SVM: NZV")
svm_train_nzv <- svm_train(iphoneNZV, test_dfNZV)
svm_train_nzv["model"]
svm_train_nzv["post_resample"]

print("SVM: COR")
svm_train_cor <- svm_train(iphoneCOR, test_dfCOR)
svm_train_cor["model"]
svm_train_cor["post_resample"]

```
## K-nearest Neighbors (KNN) Model
```{r}
print("KNN: Full Dataset")
knn_train_full <- knn_train(df, test_df)
knn_train_full["model"]$model
knn_train_full["post_resample"]

print("KNN: RFE")
knn_train_rfe <- knn_train(iphoneRFE, test_dfRFE)
knn_train_rfe["model"]$model
knn_train_rfe["post_resample"]

print("KNN: NZV")
knn_train_nzv <- knn_train(iphoneNZV, test_dfNZV)
knn_train_nzv["model"]$model
knn_train_nzv["post_resample"]

print("KNN: COR")
knn_train_cor <- knn_train(iphoneCOR, test_dfCOR)
knn_train_cor["model"]$model
knn_train_cor["post_resample"]
```
## Models Performance
Grouped bar chart to evaluate model performance
```{r echo=FALSE}
model_name <- c(rep("C5.0" , 2) , rep("RF" , 2) , rep("SVM" , 2) , rep("KNN" , 2) )
metric <- rep(c("Accuracy" , "Kappa") , 4)
value <- c(dt_c50_rfe["post_resample"]$post_resample, rf_rfe["post_resample"]$post_resample, svm_train_rfe["post_resample"]$post_resample, knn_train_full["post_resample"]$post_resample)
plot_data <- data.frame(model_name,metric,value)

ggplot(plot_data, aes(fill=metric, y=value, x=model_name)) + 
  geom_bar(position="dodge", stat="identity") +
  xlab("Model") + 
  ggtitle("iPhone Models Comparison")

```

### Confusioon Matrix comparison

**Note:** The KNN showed so poor performance in the Accuracy and Kappa metrics, that was discarded from the analysis.

```{r}
# Creating confusion matrix
iphone_cm_dt <- confusionMatrix(predict(dt_c50_rfe["model"], test_df)$model, test_df$iphonesentiment)
plot_confusion_matrix(iphone_cm_dt, "C5.0")

iphone_cmsvm <- confusionMatrix(predict(svm_train_rfe["model"], test_df)$model, test_df$iphonesentiment) 
plot_confusion_matrix(iphone_cmsvm, "SVM")

iphone_cmRF <- confusionMatrix(predict(rf_rfe["model"], test_df)$model, test_df$iphonesentiment) 
plot_confusion_matrix(iphone_cmRF, "Random Forest")

print("C5.0 detail")
iphone_cm_dt
print("\n-------------------------------------------------------------------------------")
print("RF detail")
iphone_cmRF
print("\n-------------------------------------------------------------------------------")
print("SVM detail")
iphone_cmsvm
```
## Model Selection

The Accuracy shown by the **Random Forest model, using the Recursive Feature Elimination technique**, was the highest. It also showed the best-balanced accuracy on the confusion matrix analysis. However, caution should be taken since all the models tend to classify occurrences to the "Very Positive (5)" class.

## Large dataset prediction
Pre processing the large datatset 
```{r}
large_df <- read_csv("big_matrix.csv")
large_df$id <- NULL

# create new data set with RFE recommended features
large_df <- large_df[,predictors(rfe_results)]
# review outcome
str(large_df)
```

Apply Model on the large dataset
-----------------------------------------------------------------
```{r echo=FALSE}
iphone_predicted <- predict(rf_rfe["model"]$model, large_df)$model
large_df$iphonesentiment<- iphone_predicted
summary(iphone_predicted)
```
## iPhone Sentiments Results
```{r}
iphonesentiment <- summary(iphone_predicted)
iphonesentiment_df <- data.frame("Categorie"=c("Very Negative", "Negative", "Somewhat Negative", "Somewhat Positive", "Positive", "Very Positive"), iphonesentiment)
iphone_sent_data <- iphonesentiment_df[,c('Categorie', 'iphonesentiment')]

iphone_pie <- plot_ly(iphone_sent_data, labels = ~Categorie, values = ~iphonesentiment, type = 'pie') %>%
  layout(title = 'iPhone Sentiment - Nov 2019',
         xaxis = list(showgrid = FALSE, zeroline = FALSE, showticklabels = FALSE),
         yaxis = list(showgrid = FALSE, zeroline = FALSE, showticklabels = FALSE))

iphone_pie

```
_________________________________________
# Galaxy analysis

Load training dataset for Galaxy labeled sentiment.
```{r echo=FALSE}
galaxyDF <- read_csv("galaxy_smallmatrix_labeled_9d.csv")
```

Explore structure and descriptive statistics from the training datasets
```{r echo=FALSE}
describe_df("Galaxy", galaxyDF)
```

## Labeled sentiment distribution.
```{r}
plot_ly(galaxyDF, x= ~galaxyDF$galaxysentiment, type='histogram')
```
## Feature selection methods
### Features Correlation
Explore correlation between all variables:
```{r echo=FALSE}
# create a new data set and remove features highly correlated with the dependant 
galaxyCOR <- remove_highly_correlated_features(galaxyDF)
paste("Number of original features: ", ncol(galaxyDF))
paste("Number of features after cleanup: ", ncol(galaxyCOR))
plot_correlation(galaxyCOR)
```
### Near Zero Variables
Removing near zero vars:
```{r}
galaxy_nzvMetrics <- nearZeroVar(galaxyCOR, saveMetrics = TRUE)
str(galaxy_nzvMetrics)

galaxyNZV <- remove_nzv(galaxyCOR)

paste("NZV number of features after cleanup: ", ncol(galaxyNZV))
```
### Recursive Feature Elimination (RFE)
```{r}
set.seed(9874568)
galaxy_sample <- galaxyDF[sample(1:nrow(galaxyDF), 1000, replace=FALSE),]

# Set up rfeControl with randomforest, repeated cross validation and no updates
ctrl <- rfeControl(functions = rfFuncs,
                   method = "repeatedcv",
                   repeats = 5,
                   verbose = FALSE)

# Use rfe and omit the response variable (attribute 59 galaxysentiment)
g_rfe_results <- run_in_parallel(rfe, galaxy_sample[,1:58],
                              galaxy_sample$galaxysentiment,
                              sizes=(1:58), rfeControl=ctrl)

# Get results
g_rfe_results

# Plot results
plot(g_rfe_results, type=c("g", "o"))
```

Create a new dataset with the best features found by RFE
```{r}
# create new data set with rfe recommended features
galaxyRFE <- galaxyDF[,predictors(g_rfe_results)]

# add the dependent variable to galaxyRFE
galaxyRFE$galaxysentiment <- galaxyDF$galaxysentiment

# review outcome
str(galaxyRFE)
```
## Models training
### Preprocess label and Data Partition
```{r}
g_df <- galaxyDF
g_df$galaxysentiment <- as.factor(g_df$galaxysentiment)
plot_ly(g_df, x= ~g_df$galaxysentiment, type='histogram')

set.seed(90210)
g_dataPar <- createDataPartition(g_df$galaxysentiment, p = .70, list = FALSE)
g_train_df <- g_df[g_dataPar,]
g_test_df <- g_df[-g_dataPar,]

#galaxyCOR
galaxyCOR$galaxysentiment <- as.factor(galaxyCOR$galaxysentiment)
set.seed(90210)
g_dataParCOR <- createDataPartition(galaxyCOR$galaxysentiment, p = .70, list = FALSE)
g_train_dfCOR <- galaxyCOR[g_dataParCOR,]
g_test_dfCOR <- galaxyCOR[-g_dataParCOR,]

#galaxyRFE
galaxyRFE$galaxysentiment <- as.factor(galaxyRFE$galaxysentiment)
set.seed(90210)
g_dataParRFE <- createDataPartition(galaxyRFE$galaxysentiment, p = .70, list = FALSE)
g_train_dfRFE <- galaxyRFE[g_dataParRFE,]
g_test_dfRFE <- galaxyRFE[-g_dataParRFE,]

#galaxyNZV
galaxyNZV$galaxysentiment <- as.factor(galaxyNZV$galaxysentiment)
set.seed(90210)
g_dataParNZV <- createDataPartition(galaxyNZV$galaxysentiment, p = .70, list = FALSE)
g_train_dfNZV <- galaxyNZV[g_dataParNZV,]
g_test_dfNZV <- galaxyNZV[-g_dataParNZV,]

```
## Cross Validation Fit Control
```{r}
# cross validation 
fitControl <- trainControl(method = "repeatedcv", number = 10, repeats = 2)

# Training functions
g_svm_train <- function(dataF, testing_data) {
  # SVM (from the e1071 package) 
  library(e1071)
  set.seed(641386945)
  system.time(res.model <- run_in_parallel(svm, galaxysentiment ~., data = dataF))
  res.predictions <- predict(res.model, testing_data) 
  res.post_resample <- postResample(res.predictions, testing_data$galaxysentiment)
  return(list("model" = res.model, "post_resample" = res.post_resample))
}

g_knn_train <- function(dataF, testing_data) {
  # K-nearest Neighbors (from the kknn package)
  library(kknn)
  set.seed(641386945)
  system.time(res.model <- run_in_parallel(train.kknn, galaxysentiment ~., data = dataF))
  res.predictions <- predict(res.model, testing_data) 
  res.post_resample <- postResample(res.predictions, testing_data$galaxysentiment)
  return(list("model" = res.model, "post_resample" = res.post_resample))
}

g_caret_train <- function(dataF, testing_data, model_name, fitCtrl) {
  set.seed(641386945)
  system.time(res.model <- run_in_parallel(train, galaxysentiment~., data = dataF, method = model_name, trControl = fitCtrl ))
  res.predictions <- predict(res.model, testing_data) 
  res.post_resample <- postResample(res.predictions, testing_data$galaxysentiment)
  return(list("model" = res.model, "post_resample" = res.post_resample))
}
```
### C5.0 Model
```{r}
##### Decision Tree (C5.0) #####
print("C5.0: Full Dataset")
g_dt_c50 <- g_caret_train(g_df, g_test_df, 'C5.0', fitControl)
g_dt_c50["model"]
g_dt_c50["post_resample"]

#Train model with RFE dataset:
print("C5.0: RFE")
g_dt_c50_rfe <- g_caret_train(galaxyRFE, g_test_dfRFE, 'C5.0', fitControl)
g_dt_c50_rfe["model"]
g_dt_c50_rfe["post_resample"]

#Train model with NZV dataset:
print("C5.0: NZV")
g_dt_c50_nzv <- g_caret_train(galaxyNZV, g_test_dfNZV, 'C5.0', fitControl)
g_dt_c50_nzv["model"]
g_dt_c50_nzv["post_resample"]

#Train model with COR dataset:
print("C5.0: COR")
g_dt_c50_cor <- g_caret_train(galaxyCOR, g_test_dfCOR, 'C5.0', fitControl)
g_dt_c50_cor["model"]
g_dt_c50_cor["post_resample"]
```

### Random Forest Model
```{r}
print("Random Forest: Full Dataset")
g_rf <- g_caret_train(g_df, g_test_df, 'rf', fitControl)
g_rf["model"]
g_rf["post_resample"]

print("Random Forest: RFE")
g_rf_rfe <- g_caret_train(galaxyRFE, g_test_dfRFE, 'rf', fitControl)
g_rf_rfe["model"]
g_rf_rfe["post_resample"]

print("Random Forest: NZV")
g_rf_nzv <- g_caret_train(galaxyNZV, g_test_dfNZV, 'rf', fitControl)
g_rf_nzv["model"]
g_rf_nzv["post_resample"]

print("Random Forest: COR")
g_rf_cor <- g_caret_train(galaxyCOR, g_test_dfCOR, 'rf', fitControl)
g_rf_cor["model"]
g_rf_cor["post_resample"]
```


### Support Vector Machine (SVM) Model
```{r}
print("SVM: Full Dataset")
g_svm_train_full <- g_svm_train(g_df, g_test_df)
g_svm_train_full["model"]
g_svm_train_full["post_resample"]

print("SVM: RFE")
g_svm_train_rfe <- g_svm_train(galaxyRFE, g_test_dfRFE)
g_svm_train_rfe["model"]
g_svm_train_rfe["post_resample"]

print("SVM: NZV")
g_svm_train_nzv <- g_svm_train(galaxyNZV, g_test_dfNZV)
g_svm_train_nzv["model"]
g_svm_train_nzv["post_resample"]

print("SVM: COR")
g_svm_train_cor <- g_svm_train(galaxyCOR, g_test_dfCOR)
g_svm_train_cor["model"]
g_svm_train_cor["post_resample"]

```
## K-nearest Neighbors (KNN) Model
```{r}
print("KNN: Full Dataset")
g_knn_train_full <- g_knn_train(g_df, g_test_df)
g_knn_train_full["model"]
g_knn_train_full["post_resample"]

print("KNN: RFE")
g_knn_train_rfe <- g_knn_train(galaxyRFE, g_test_dfRFE)
g_knn_train_rfe["model"]
g_knn_train_rfe["post_resample"]

print("KNN: NZV")
g_knn_train_nzv <- g_knn_train(galaxyNZV, g_test_dfNZV)
g_knn_train_nzv["model"]
g_knn_train_nzv["post_resample"]

print("KNN: COR")
g_knn_train_cor <- g_knn_train(galaxyCOR, g_test_dfCOR)
g_knn_train_cor["model"]
g_knn_train_cor["post_resample"]
```
## Models Performance
Grouped bar chart to evaluate model performance
## Models Performance
Grouped bar chart to evaluate model performance
```{r echo=FALSE}
g_model_name <- c(rep("C5.0" , 2) , rep("RF" , 2) , rep("SVM" , 2) , rep("KNN" , 2) )
metric <- rep(c("Accuracy" , "Kappa") , 4)
value <- c(g_dt_c50["post_resample"]$post_resample, g_rf["post_resample"]$post_resample, g_svm_train_nzv["post_resample"]$post_resample, g_knn_train_cor["post_resample"]$post_resample)
plot_data <- data.frame(g_model_name,metric,value)

ggplot(plot_data, aes(fill=metric, y=value, x=g_model_name)) + 
  geom_bar(position="dodge", stat="identity") +
  xlab("Model") + 
  ggtitle("Galaxy Models Comparison")

```

```{r}
# Creating confusion matrix
g_cm_dt <- confusionMatrix(predict(g_dt_c50["model"], g_test_df)$model, g_test_df$galaxysentiment)
plot_confusion_matrix(g_cm_dt, "C5.0")

g_cmsvm <- confusionMatrix(predict(g_svm_train_nzv["model"], g_test_df)$model, g_test_df$galaxysentiment) 
plot_confusion_matrix(g_cmsvm, "SVM")

g_cmRF <- confusionMatrix(predict(g_rf["model"], g_test_df)$model, g_test_df$galaxysentiment) 
plot_confusion_matrix(g_cmRF, "Random Forest")

g_cmknn <- confusionMatrix(predict(g_knn_train_cor["model"]$model, g_test_df), g_test_df$galaxysentiment) 
plot_confusion_matrix(g_cmknn, "KNN")

print("C5.0 detail")
g_cm_dt
print("\n-------------------------------------------------------------------------------")
print("RF detail")
g_cmRF
print("\n-------------------------------------------------------------------------------")
print("SVM detail")
g_cmsvm
print("\n-------------------------------------------------------------------------------")
print("KNN detail")
g_cmknn
```
## Model Selection

The accuracy and kappa shown by the **Random Forest model, using all the feattures from the dataset were the best.

## Large dataset prediction
Pre processing the large datatset 
```{r}
g_large_df <- read_csv("big_matrix.csv")
g_large_df$id <- NULL

# No Feautures need to be removed
# review outcome
str(g_large_df)
```

Apply Model on the large dataset
```{r}
g_large_df$galaxysentiment<- predict(g_rf["model"]$model, g_large_df)
head(g_large_df$galaxysentiment, 5)
summary(g_large_df$galaxysentiment)
```
## Galaxy Sentiments Results
```{r}
galaxysentiment <- summary(g_large_df$galaxysentiment)
galaxysentiment_df <- data.frame("Categorie"=c("Very Negative", "Negative", "Somewhat Negative", "Somewhat Positive", "Positive", "Very Positive"), galaxysentiment)
galaxy_sent_data <- galaxysentiment_df[,c('Categorie', 'galaxysentiment')]

galaxy_pie <- plot_ly(galaxy_sent_data, labels = ~Categorie, values = ~galaxysentiment, type = 'pie') %>%
  layout(title = 'Galaxy Sentiment - Nov 2019',
         xaxis = list(showgrid = FALSE, zeroline = FALSE, showticklabels = FALSE),
         yaxis = list(showgrid = FALSE, zeroline = FALSE, showticklabels = FALSE))

galaxy_pie

```
```{r}
p <- plot_ly() %>%
  add_pie(data=galaxy_sent_data, labels = ~Categorie, values = ~galaxysentiment,
          name = "Galaxy", domain = list(x = c(0, 0.4), y = c(0.4, 1))) %>%
  add_pie(iphone_sent_data, labels = ~Categorie, values = ~iphonesentiment,
          name = "iPhone", domain = list(x = c(0.6, 1), y = c(0.4, 1))) %>%
  layout(title = "Galaxy (left) vs iPhone (right) Sentiments ", showlegend = F,
         xaxis = list(showgrid = FALSE, zeroline = FALSE, showticklabels = TRUE),
         yaxis = list(showgrid = FALSE, zeroline = FALSE, showticklabels = TRUE))

p
```

--EOF--